org.apache.beam.runners.core.construction.SerializablePipelineOptions Java Exaples

Source File: FlinkDoFnFunction.java From beam with Apache License 2.0

6 votes

public FlinkDoFnFunction(
    DoFn<InputT, OutputT> doFn,
    String stepName,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions options,
    Map<TupleTag<?>, Integer> outputMap,
    TupleTag<OutputT> mainOutputTag,
    Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoderMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {

  this.doFn = doFn;
  this.stepName = stepName;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(options);
  this.windowingStrategy = windowingStrategy;
  this.outputMap = outputMap;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.outputCoderMap = outputCoderMap;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}

Source File: GroupByWindowFunction.java From beam with Apache License 2.0

6 votes

/**
 * Method used to initialize the transient variables that were sent over as byte arrays or proto
 * buffers.
 */
private void initTransient() {
  if (isInitialized) {
    return;
  }

  SdkComponents components = SdkComponents.create();
  options = new SerializablePipelineOptions(serializedOptions).get();

  try {
    options = null;
    windowStrategyProto = RunnerApi.MessageWithComponents.parseFrom(windowBytes);
    windowingStrategy =
        (WindowingStrategy<?, W>)
            WindowingStrategyTranslation.fromProto(
                windowStrategyProto.getWindowingStrategy(),
                RehydratedComponents.forComponents(components.toComponents()));
  } catch (InvalidProtocolBufferException e) {
    LOG.info(e.getMessage());
  }
  this.isInitialized = true;
}

Source File: AssignWindowsFunction.java From beam with Apache License 2.0

6 votes

/**
 * Method used to initialize the transient variables that were sent over as byte arrays or proto
 * buffers.
 */
private void initTransient() {
  if (isInitialized) {
    return;
  }
  options = new SerializablePipelineOptions(serializedOptions).get();

  try {
    RunnerApi.FunctionSpec windowFnProto = RunnerApi.FunctionSpec.parseFrom(windowFnBytes);

    windowFn =
        (WindowFn<T, BoundedWindow>)
            WindowingStrategyTranslation.windowFnFromProto(windowFnProto);
  } catch (InvalidProtocolBufferException e) {
    LOG.info(e.getMessage());
  }
  this.isInitialized = true;
}

Source File: GroupByWindowFunction.java From beam with Apache License 2.0

6 votes

public GroupByWindowFunction(
    WindowingStrategy<?, W> windowingStrategy,
    SystemReduceFn<K, V, Iterable<V>, Iterable<V>, W> reduceFn,
    PipelineOptions options) {
  this.windowingStrategy = windowingStrategy;
  this.options = options;
  this.serializedOptions = new SerializablePipelineOptions(options).toString();
  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(
      Environments.createOrGetDefaultEnvironment(options.as(PortablePipelineOptions.class)));

  try {
    windowStrategyProto =
        WindowingStrategyTranslation.toMessageProto(windowingStrategy, components);
    windowBytes = windowStrategyProto.toByteArray();
  } catch (IOException e) {
    LOG.info(e.getMessage());
  }
  this.reduceFn = reduceFn;
}

Source File: ConfigBuilder.java From beam with Apache License 2.0

6 votes

public Config build() {
  try {
    // apply framework configs
    config.putAll(createSystemConfig(options));

    // apply user configs
    config.putAll(createUserConfig(options));

    config.put(ApplicationConfig.APP_NAME, options.getJobName());
    config.put(ApplicationConfig.APP_ID, options.getJobInstance());
    config.put(JOB_NAME, options.getJobName());
    config.put(JOB_ID, options.getJobInstance());

    config.put(
        "beamPipelineOptions",
        Base64Serializer.serializeUnchecked(new SerializablePipelineOptions(options)));

    validateConfigs(options, config);

    return new MapConfig(config);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

Source File: FlinkStatefulDoFnFunction.java From beam with Apache License 2.0

6 votes

public FlinkStatefulDoFnFunction(
    DoFn<KV<K, V>, OutputT> dofn,
    String stepName,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions pipelineOptions,
    Map<TupleTag<?>, Integer> outputMap,
    TupleTag<OutputT> mainOutputTag,
    Coder<KV<K, V>> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoderMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {

  this.dofn = dofn;
  this.stepName = stepName;
  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
  this.outputMap = outputMap;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.outputCoderMap = outputCoderMap;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}

Source File: DatasetSourceBatch.java From beam with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
private DatasetReader(DataSourceOptions options) {
  if (!options.get(BEAM_SOURCE_OPTION).isPresent()) {
    throw new RuntimeException("Beam source was not set in DataSource options");
  }
  this.source =
      Base64Serializer.deserializeUnchecked(
          options.get(BEAM_SOURCE_OPTION).get(), BoundedSource.class);

  if (!options.get(DEFAULT_PARALLELISM).isPresent()) {
    throw new RuntimeException("Spark default parallelism was not set in DataSource options");
  }
  this.numPartitions = Integer.parseInt(options.get(DEFAULT_PARALLELISM).get());
  checkArgument(numPartitions > 0, "Number of partitions must be greater than zero.");

  if (!options.get(PIPELINE_OPTIONS).isPresent()) {
    throw new RuntimeException("Beam pipelineOptions were not set in DataSource options");
  }
  this.serializablePipelineOptions =
      new SerializablePipelineOptions(options.get(PIPELINE_OPTIONS).get());
}

Source File: DatasetSourceStreaming.java From beam with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
private DatasetMicroBatchReader(String checkpointLocation, DataSourceOptions options) {
  if (!options.get(BEAM_SOURCE_OPTION).isPresent()) {
    throw new RuntimeException("Beam source was not set in DataSource options");
  }
  this.source =
      Base64Serializer.deserializeUnchecked(
          options.get(BEAM_SOURCE_OPTION).get(), UnboundedSource.class);

  if (!options.get(DEFAULT_PARALLELISM).isPresent()) {
    throw new RuntimeException("Spark default parallelism was not set in DataSource options");
  }
  this.numPartitions = Integer.parseInt(options.get(DEFAULT_PARALLELISM).get());
  checkArgument(numPartitions > 0, "Number of partitions must be greater than zero.");

  if (!options.get(PIPELINE_OPTIONS).isPresent()) {
    throw new RuntimeException("Beam pipelineOptions were not set in DataSource options");
  }
  this.serializablePipelineOptions =
      new SerializablePipelineOptions(options.get(PIPELINE_OPTIONS).get());
}

Source File: DatasetSourceStreaming.java From beam with Apache License 2.0

6 votes

DatasetPartitionReader(
    UnboundedSource<T, CheckpointMarkT> source,
    SerializablePipelineOptions serializablePipelineOptions) {
  this.started = false;
  this.closed = false;
  this.source = source;
  // reader is not serializable so lazy initialize it
  try {
    reader =
        // In
        // https://blog.yuvalitzchakov.com/exploring-stateful-streaming-with-spark-structured-streaming/
        // "Structured Streaming stores and retrieves the offsets on our behalf when re-running
        // the application meaning we no longer have to store them externally."
        source.createReader(serializablePipelineOptions.get(), null);
  } catch (IOException e) {
    throw new RuntimeException("Error creating UnboundedReader ", e);
  }
}

Source File: SparkGroupAlsoByWindowViaWindowSet.java From beam with Apache License 2.0

6 votes

UpdateStateByKeyFunction(
    final List<Integer> sourceIds,
    final WindowingStrategy<?, W> windowingStrategy,
    final FullWindowedValueCoder<InputT> wvCoder,
    final Coder<K> keyCoder,
    final SerializablePipelineOptions options,
    final String logPrefix) {
  this.wvCoder = wvCoder;
  this.keyCoder = keyCoder;
  this.sourceIds = sourceIds;
  this.timerDataCoder = timerDataCoderOf(windowingStrategy);
  this.windowingStrategy = windowingStrategy;
  this.options = options;
  this.itrWvCoder = IterableCoder.of(wvCoder);
  this.logPrefix = logPrefix;
  this.wvKvIterCoder =
      windowedValueKeyValueCoderOf(
          keyCoder,
          wvCoder.getValueCoder(),
          ((FullWindowedValueCoder<InputT>) wvCoder).getWindowCoder());
}

Source File: AbstractDoFnTransform.java From incubator-nemo with Apache License 2.0

6 votes

/**
 * AbstractDoFnTransform constructor.
 *
 * @param doFn                 doFn
 * @param inputCoder           input coder
 * @param outputCoders         output coders
 * @param mainOutputTag        main output tag
 * @param additionalOutputTags additional output tags
 * @param windowingStrategy    windowing strategy
 * @param sideInputs           side inputs
 * @param options              pipeline options
 * @param displayData          display data.
 */
public AbstractDoFnTransform(final DoFn<InterT, OutputT> doFn,
                             final Coder<InputT> inputCoder,
                             final Map<TupleTag<?>, Coder<?>> outputCoders,
                             final TupleTag<OutputT> mainOutputTag,
                             final List<TupleTag<?>> additionalOutputTags,
                             final WindowingStrategy<?, ?> windowingStrategy,
                             final Map<Integer, PCollectionView<?>> sideInputs,
                             final PipelineOptions options,
                             final DisplayData displayData,
                             final DoFnSchemaInformation doFnSchemaInformation,
                             final Map<String, PCollectionView<?>> sideInputMapping) {
  this.doFn = doFn;
  this.inputCoder = inputCoder;
  this.outputCoders = outputCoders;
  this.mainOutputTag = mainOutputTag;
  this.additionalOutputTags = additionalOutputTags;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(options);
  this.windowingStrategy = windowingStrategy;
  this.displayData = displayData;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}

Source File: FlinkExecutableStageFunction.java From beam with Apache License 2.0

6 votes

public FlinkExecutableStageFunction(
    String stepName,
    PipelineOptions pipelineOptions,
    RunnerApi.ExecutableStagePayload stagePayload,
    JobInfo jobInfo,
    Map<String, Integer> outputMap,
    FlinkExecutableStageContextFactory contextFactory,
    Coder windowCoder) {
  this.stepName = stepName;
  this.pipelineOptions = new SerializablePipelineOptions(pipelineOptions);
  this.stagePayload = stagePayload;
  this.jobInfo = jobInfo;
  this.outputMap = outputMap;
  this.contextFactory = contextFactory;
  this.windowCoder = windowCoder;
}

Source File: SparkCombineFn.java From beam with Apache License 2.0

6 votes

SparkCombineFn(
    boolean global,
    Function<InputT, ValueT> toValue,
    CombineWithContext.CombineFnWithContext<ValueT, AccumT, OutputT> combineFn,
    SerializablePipelineOptions options,
    Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs,
    WindowingStrategy<?, ?> windowingStrategy) {
  this(
      global,
      toValue,
      combineFn,
      options,
      sideInputs,
      windowingStrategy,
      WindowedAccumulator.Type.EXPLODE_WINDOWS);
}

Source File: SparkCombineFn.java From beam with Apache License 2.0

6 votes

@VisibleForTesting
SparkCombineFn(
    boolean global,
    Function<InputT, ValueT> toValue,
    CombineWithContext.CombineFnWithContext<ValueT, AccumT, OutputT> combineFn,
    SerializablePipelineOptions options,
    Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs,
    WindowingStrategy<?, ?> windowingStrategy,
    WindowedAccumulator.Type defaultNonMergingCombineStrategy) {

  this.globalCombine = global;
  this.options = options;
  this.sideInputs = sideInputs;
  @SuppressWarnings("unchecked")
  WindowingStrategy<?, BoundedWindow> castStrategy = (WindowingStrategy) windowingStrategy;
  this.windowingStrategy = castStrategy;
  this.toValue = toValue;
  this.defaultNonMergingCombineStrategy = defaultNonMergingCombineStrategy;
  this.combineFn = combineFn;
  @SuppressWarnings("unchecked")
  TypeDescriptor<BoundedWindow> untyped =
      (TypeDescriptor<BoundedWindow>) windowingStrategy.getWindowFn().getWindowTypeDescriptor();
  this.windowComparator = asWindowComparator(untyped);
}

Source File: SourceRDD.java From beam with Apache License 2.0

6 votes

public Bounded(
    SparkContext sc,
    BoundedSource<T> source,
    SerializablePipelineOptions options,
    String stepName) {
  super(sc, NIL, JavaSparkContext$.MODULE$.fakeClassTag());
  this.source = source;
  this.options = options;
  // the input parallelism is determined by Spark's scheduler backend.
  // when running on YARN/SparkDeploy it's the result of max(totalCores, 2).
  // when running on Mesos it's 8.
  // when running local it's the total number of cores (local = 1, local[N] = N,
  // local[*] = estimation of the machine's cores).
  // ** the configuration "spark.default.parallelism" takes precedence over all of the above **
  this.numPartitions = sc.defaultParallelism();
  checkArgument(this.numPartitions > 0, "Number of partitions must be greater than zero.");
  this.bundleSize = options.get().as(SparkPipelineOptions.class).getBundleSize();
  this.stepName = stepName;
  this.metricsAccum = MetricsAccumulator.getInstance();
}

Source File: FlinkPartialReduceFunction.java From beam with Apache License 2.0

5 votes

public FlinkPartialReduceFunction(
    CombineFnBase.GlobalCombineFn<InputT, AccumT, ?> combineFn,
    WindowingStrategy<Object, W> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions pipelineOptions,
    boolean groupedByWindow) {
  this.combineFn = combineFn;
  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
  this.groupedByWindow = groupedByWindow;
}

Source File: SparkCombineFn.java From beam with Apache License 2.0

5 votes

public static <InputT, AccumT, OutputT> SparkCombineFn<InputT, InputT, AccumT, OutputT> globally(
    CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT> combineFn,
    SerializablePipelineOptions options,
    Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs,
    WindowingStrategy<?, ?> windowingStrategy) {
  return new SparkCombineFn<>(true, e -> e, combineFn, options, sideInputs, windowingStrategy);
}

Source File: SourceDStream.java From beam with Apache License 2.0

5 votes

SourceDStream(
    StreamingContext ssc,
    UnboundedSource<T, CheckpointMarkT> unboundedSource,
    SerializablePipelineOptions options,
    Long boundMaxRecords) {
  super(ssc, JavaSparkContext$.MODULE$.fakeClassTag());
  this.unboundedSource = unboundedSource;
  this.options = options;

  SparkPipelineOptions sparkOptions = options.get().as(SparkPipelineOptions.class);

  // Reader cache expiration interval. 50% of batch interval is added to accommodate latency.
  this.readerCacheInterval = 1.5 * sparkOptions.getBatchIntervalMillis();

  this.boundReadDuration =
      boundReadDuration(
          sparkOptions.getReadTimePercentage(), sparkOptions.getMinReadTimeMillis());
  // set initial parallelism once.
  this.initialParallelism = ssc().sparkContext().defaultParallelism();
  checkArgument(this.initialParallelism > 0, "Number of partitions must be greater than zero.");

  this.boundMaxRecords = boundMaxRecords;

  try {
    this.numPartitions = createMicrobatchSource().split(sparkOptions).size();
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

Source File: SourceRDD.java From beam with Apache License 2.0

5 votes

public Unbounded(
    SparkContext sc,
    SerializablePipelineOptions options,
    MicrobatchSource<T, CheckpointMarkT> microbatchSource,
    int initialNumPartitions) {
  super(sc, NIL, JavaSparkContext$.MODULE$.fakeClassTag());
  this.options = options;
  this.microbatchSource = microbatchSource;
  this.partitioner = new HashPartitioner(initialNumPartitions);
}

Source File: FlinkMergingNonShuffleReduceFunction.java From beam with Apache License 2.0

5 votes

public FlinkMergingNonShuffleReduceFunction(
    CombineFnBase.GlobalCombineFn<InputT, AccumT, OutputT> combineFn,
    WindowingStrategy<Object, W> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions pipelineOptions) {

  this.combineFn = combineFn;

  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;

  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
}

Source File: SparkCombineFn.java From beam with Apache License 2.0

5 votes

public static <K, V, AccumT, OutputT> SparkCombineFn<KV<K, V>, V, AccumT, OutputT> keyed(
    CombineWithContext.CombineFnWithContext<V, AccumT, OutputT> combineFn,
    SerializablePipelineOptions options,
    Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs,
    WindowingStrategy<?, ?> windowingStrategy) {
  return new SparkCombineFn<>(
      false, KV::getValue, combineFn, options, sideInputs, windowingStrategy);
}

Source File: MultiDoFnFunction.java From beam with Apache License 2.0

5 votes

/**
 * @param metricsAccum The Spark {@link AccumulatorV2} that backs the Beam metrics.
 * @param doFn The {@link DoFn} to be wrapped.
 * @param options The {@link SerializablePipelineOptions}.
 * @param mainOutputTag The main output {@link TupleTag}.
 * @param additionalOutputTags Additional {@link TupleTag output tags}.
 * @param inputCoder The coder for the input.
 * @param outputCoders A map of all output coders.
 * @param sideInputs Side inputs used in this {@link DoFn}.
 * @param windowingStrategy Input {@link WindowingStrategy}.
 * @param stateful Stateful {@link DoFn}.
 */
public MultiDoFnFunction(
    MetricsContainerStepMapAccumulator metricsAccum,
    String stepName,
    DoFn<InputT, OutputT> doFn,
    SerializablePipelineOptions options,
    TupleTag<OutputT> mainOutputTag,
    List<TupleTag<?>> additionalOutputTags,
    Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs,
    WindowingStrategy<?, ?> windowingStrategy,
    boolean stateful,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  this.metricsAccum = metricsAccum;
  this.stepName = stepName;
  this.doFn = SerializableUtils.clone(doFn);
  this.options = options;
  this.mainOutputTag = mainOutputTag;
  this.additionalOutputTags = additionalOutputTags;
  this.inputCoder = inputCoder;
  this.outputCoders = outputCoders;
  this.sideInputs = sideInputs;
  this.windowingStrategy = windowingStrategy;
  this.stateful = stateful;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}

Source File: SparkGroupAlsoByWindowViaWindowSet.java From beam with Apache License 2.0

5 votes

public static <K, InputT, W extends BoundedWindow>
    JavaDStream<WindowedValue<KV<K, Iterable<InputT>>>> groupByKeyAndWindow(
        final JavaDStream<WindowedValue<KV<K, InputT>>> inputDStream,
        final Coder<K> keyCoder,
        final Coder<WindowedValue<InputT>> wvCoder,
        final WindowingStrategy<?, W> windowingStrategy,
        final SerializablePipelineOptions options,
        final List<Integer> sourceIds,
        final String transformFullName) {

  final PairDStreamFunctions<ByteArray, byte[]> pairDStream =
      buildPairDStream(inputDStream, keyCoder, wvCoder);

  // use updateStateByKey to scan through the state and update elements and timers.
  final UpdateStateByKeyFunction<K, InputT, W> updateFunc =
      new UpdateStateByKeyFunction<>(
          sourceIds,
          windowingStrategy,
          (FullWindowedValueCoder<InputT>) wvCoder,
          keyCoder,
          options,
          transformFullName);

  final DStream<
          Tuple2</*K*/ ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/ List<byte[]>>>>
      firedStream =
          pairDStream.updateStateByKey(
              updateFunc,
              pairDStream.defaultPartitioner(pairDStream.defaultPartitioner$default$1()),
              true,
              JavaSparkContext$.MODULE$.fakeClassTag());

  checkpointIfNeeded(firedStream, options);

  // filter state-only output (nothing to fire) and remove the state from the output.
  return stripStateValues(firedStream, keyCoder, (FullWindowedValueCoder<InputT>) wvCoder);
}

Source File: AssignWindowsFunction.java From beam with Apache License 2.0

5 votes

public AssignWindowsFunction(WindowFn<T, BoundedWindow> windowFn, PipelineOptions options) {
  this.windowFn = windowFn;
  SdkComponents components = SdkComponents.create();
  this.options = options;
  this.serializedOptions = new SerializablePipelineOptions(options).toString();
  components.registerEnvironment(
      Environments.createOrGetDefaultEnvironment(options.as(PortablePipelineOptions.class)));
  RunnerApi.FunctionSpec windowFnProto =
      WindowingStrategyTranslation.toProto(windowFn, components);
  windowFnBytes = windowFnProto.toByteArray();
}

Source File: SparkGroupAlsoByWindowViaWindowSet.java From beam with Apache License 2.0

5 votes

private static void checkpointIfNeeded(
    final DStream<Tuple2<ByteArray, Tuple2<StateAndTimers, List<byte[]>>>> firedStream,
    final SerializablePipelineOptions options) {

  final Long checkpointDurationMillis = getBatchDuration(options);

  if (checkpointDurationMillis > 0) {
    firedStream.checkpoint(new Duration(checkpointDurationMillis));
  }
}

Source File: DoFnFunction.java From beam with Apache License 2.0

5 votes

public DoFnFunction(
    Twister2TranslationContext context,
    DoFn<InputT, OutputT> doFn,
    Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    List<TupleTag<?>> sideOutputs,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    TupleTag<OutputT> mainOutput,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<TupleTag<?>, Integer> outputMap,
    Map<String, PCollectionView<?>> sideInputMapping) {
  this.doFn = doFn;
  this.pipelineOptions = context.getOptions();
  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions).toString();
  this.inputCoder = inputCoder;
  this.outputCoders = outputCoders;
  this.windowingStrategy = windowingStrategy;
  this.sideInputs = convertToTuples(sideInputs);
  this.mainOutput = mainOutput;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideOutputs = sideOutputs;
  this.stepcontext = new NoOpStepContext();
  this.outputMap = outputMap;
  this.sideInputMapping = sideInputMapping;
  outputManager = new DoFnOutputManager(this.outputMap);
  prepareSerialization();
}

Source File: Twister2BoundedSource.java From beam with Apache License 2.0

5 votes

public Twister2BoundedSource(
    BoundedSource<T> boundedSource, Twister2TranslationContext context, PipelineOptions options) {
  source = boundedSource;
  this.options = options;
  this.serializedOptions = new SerializablePipelineOptions(options).toString();
  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(
      Environments.createOrGetDefaultEnvironment(options.as(PortablePipelineOptions.class)));
  RunnerApi.FunctionSpec sourceProto = ReadTranslation.toProto(source, components);
  sourceBytes = sourceProto.getPayload().toByteArray();
}

Source File: GroupAlsoByWindowViaOutputBufferFn.java From beam with Apache License 2.0

5 votes

public GroupAlsoByWindowViaOutputBufferFn(
    WindowingStrategy<?, W> windowingStrategy,
    StateInternalsFactory<K> stateInternalsFactory,
    SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn,
    SerializablePipelineOptions options) {
  this.windowingStrategy = windowingStrategy;
  this.stateInternalsFactory = stateInternalsFactory;
  this.reduceFn = reduceFn;
  this.options = options;
}

Source File: AbstractParDoP.java From beam with Apache License 2.0

5 votes

AbstractParDoP(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<TupleTag<?>, int[]> outputCollToOrdinals,
    SerializablePipelineOptions pipelineOptions,
    TupleTag<OutputT> mainOutputTag,
    Coder<InputT> inputCoder,
    Map<PCollectionView<?>, Coder<?>> sideInputCoders,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Coder<InputT> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    Map<Integer, PCollectionView<?>> ordinalToSideInput,
    String ownerId,
    String stepId) {
  this.pipelineOptions = pipelineOptions;
  this.doFn = Utils.serde(doFn);
  this.windowingStrategy = windowingStrategy;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.outputCollToOrdinals = outputCollToOrdinals;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.sideInputCoders =
      sideInputCoders.entrySet().stream()
          .collect(
              Collectors.toMap(
                  Map.Entry::getKey,
                  e ->
                      Utils.deriveIterableValueCoder(
                          (WindowedValue.FullWindowedValueCoder) e.getValue())));
  this.outputCoders = outputCoders;
  this.inputValueCoder = inputValueCoder;
  this.outputValueCoders = outputValueCoders;
  this.ordinalToSideInput = ordinalToSideInput;
  this.ownerId = ownerId;
  this.stepId = stepId;
  this.cooperative = isCooperativenessAllowed(pipelineOptions) && hasOutput();
}

Source File: FlinkReduceFunction.java From beam with Apache License 2.0

5 votes

public FlinkReduceFunction(
    CombineFnBase.GlobalCombineFn<?, AccumT, OutputT> combineFn,
    WindowingStrategy<Object, W> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions pipelineOptions,
    boolean groupedByWindow) {
  this.combineFn = combineFn;
  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
  this.groupedByWindow = groupedByWindow;
}

org.apache.beam.runners.core.construction.SerializablePipelineOptions Java Examples