org.apache.beam.sdk.values.PCollectionList#of

Source File: PartitionTest.java From beam with Apache License 2.0

6 votes

@Test
@Category(NeedsRunner.class)
public void testDroppedPartition() {

  // Compute the set of integers either 1 or 2 mod 3, the hard way.
  PCollectionList<Integer> outputs =
      pipeline
          .apply(Create.of(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))
          .apply(Partition.of(3, new ModFn()));

  List<PCollection<Integer>> outputsList = new ArrayList<>(outputs.getAll());
  outputsList.remove(0);
  outputs = PCollectionList.of(outputsList);
  assertTrue(outputs.size() == 2);

  PCollection<Integer> output = outputs.apply(Flatten.pCollections());
  PAssert.that(output).containsInAnyOrder(2, 4, 5, 7, 8, 10, 11);
  pipeline.run();
}

Source File: TransformTreeTest.java From beam with Apache License 2.0

6 votes

@Override
public PCollectionList<String> expand(PBegin b) {
  // Composite transform: apply delegates to other transformations,
  // here a Create transform.
  PCollection<String> result = b.apply(Create.of("hello", "world"));

  // Issue below: PCollection.createPrimitiveOutput should not be used
  // from within a composite transform.
  return PCollectionList.of(
      Arrays.asList(
          result,
          PCollection.createPrimitiveOutputInternal(
              b.getPipeline(),
              WindowingStrategy.globalDefault(),
              result.isBounded(),
              StringUtf8Coder.of())));
}

Source File: WriteFeatureSetSpecAckTest.java From feast with Apache License 2.0

5 votes

@Test
public void shouldSendAckWhenAllSinksReady() {
  TestStream<FeatureSetReference> sink1 =
      TestStream.create(AvroCoder.of(FeatureSetReference.class))
          .addElements(FeatureSetReference.of("project", "fs", 1))
          .addElements(FeatureSetReference.of("project", "fs", 2))
          .addElements(FeatureSetReference.of("project", "fs", 3))
          .advanceWatermarkToInfinity();

  TestStream<FeatureSetReference> sink2 =
      TestStream.create(AvroCoder.of(FeatureSetReference.class))
          .addElements(FeatureSetReference.of("project", "fs_2", 1))
          .addElements(FeatureSetReference.of("project", "fs", 3))
          .advanceWatermarkToInfinity();

  TestStream<FeatureSetReference> sink3 =
      TestStream.create(AvroCoder.of(FeatureSetReference.class))
          .advanceProcessingTime(Duration.standardSeconds(10))
          .addElements(FeatureSetReference.of("project", "fs", 3))
          .advanceWatermarkToInfinity();

  PCollectionList<FeatureSetReference> sinks =
      PCollectionList.of(
          ImmutableList.of(
              p.apply("sink1", sink1), p.apply("sink2", sink2), p.apply("sink3", sink3)));

  PCollection<FeatureSetReference> grouped =
      sinks.apply(Flatten.pCollections()).apply(new WriteFeatureSetSpecAck.PrepareWrite(3));

  PAssert.that(grouped)
      .inOnTimePane(GlobalWindow.INSTANCE)
      .containsInAnyOrder(FeatureSetReference.of("project", "fs", 3));

  p.run();
}

Source File: Join.java From beam with Apache License 2.0

5 votes

@Override
public PCollection<KV<KeyT, OutputT>> output(OutputHint... outputHints) {
  @SuppressWarnings("unchecked")
  final PCollectionList<Object> inputs =
      PCollectionList.of(Arrays.asList((PCollection) left, (PCollection) right));
  return OperatorTransform.apply(createOperator(), inputs);
}

Source File: Join.java From beam with Apache License 2.0

5 votes

@Override
public PCollection<OutputT> outputValues(OutputHint... outputHints) {
  @SuppressWarnings("unchecked")
  final PCollectionList<Object> inputs =
      PCollectionList.of(Arrays.asList((PCollection) left, (PCollection) right));
  return OperatorTransform.apply(
      new OutputValues<>(name, outputType, createOperator()), inputs);
}

Source File: BeamSqlRelUtils.java From beam with Apache License 2.0

5 votes

/** Transforms the inputs into a PInput. */
private static PCollectionList<Row> buildPCollectionList(
    List<RelNode> inputRels, Pipeline pipeline, Map<Integer, PCollection<Row>> cache) {
  if (inputRels.isEmpty()) {
    return PCollectionList.empty(pipeline);
  } else {
    return PCollectionList.of(
        inputRels.stream()
            .map(input -> BeamSqlRelUtils.toPCollection(pipeline, (BeamRelNode) input, cache))
            .collect(Collectors.toList()));
  }
}

Source File: FlattenTest.java From beam with Apache License 2.0

5 votes

private <T> PCollectionList<T> makePCollectionList(
    Pipeline p, Coder<T> coder, List<List<T>> lists) {
  List<PCollection<T>> pcs = new ArrayList<>();
  int index = 0;
  for (List<T> list : lists) {
    PCollection<T> pc = p.apply("Create" + (index++), Create.of(list).withCoder(coder));
    pcs.add(pc);
  }
  return PCollectionList.of(pcs);
}

Source File: TransformHierarchyTest.java From beam with Apache License 2.0

5 votes

@Test
public void emptyCompositeSucceeds() {
  PCollection<Long> created =
      PCollection.createPrimitiveOutputInternal(
          pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarLongCoder.of());
  TransformHierarchy.Node node = hierarchy.pushNode("Create", PBegin.in(pipeline), Create.of(1));
  hierarchy.setOutput(created);
  hierarchy.popNode();
  PCollectionList<Long> pcList = PCollectionList.of(created);

  TransformHierarchy.Node emptyTransform =
      hierarchy.pushNode(
          "Extract",
          pcList,
          new PTransform<PCollectionList<Long>, PCollection<Long>>() {
            @Override
            public PCollection<Long> expand(PCollectionList<Long> input) {
              return input.get(0);
            }
          });
  hierarchy.setOutput(created);
  hierarchy.popNode();
  assertThat(hierarchy.getProducer(created), equalTo(node));
  assertThat(
      "A Transform that produces non-primitive output should be composite",
      emptyTransform.isCompositeNode(),
      is(true));
}

Source File: TransformHierarchyTest.java From beam with Apache License 2.0

5 votes

@Test
public void producingOwnAndOthersOutputsFails() {
  PCollection<Long> created =
      PCollection.createPrimitiveOutputInternal(
          pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarLongCoder.of());
  hierarchy.pushNode("Create", PBegin.in(pipeline), Create.of(1));
  hierarchy.setOutput(created);
  hierarchy.popNode();
  PCollectionList<Long> pcList = PCollectionList.of(created);

  final PCollectionList<Long> appended =
      pcList.and(
          PCollection.createPrimitiveOutputInternal(
                  pipeline,
                  WindowingStrategy.globalDefault(),
                  IsBounded.BOUNDED,
                  VarLongCoder.of())
              .setName("prim"));
  hierarchy.pushNode(
      "AddPc",
      pcList,
      new PTransform<PCollectionList<Long>, PCollectionList<Long>>() {
        @Override
        public PCollectionList<Long> expand(PCollectionList<Long> input) {
          return appended;
        }
      });
  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("contains a primitive POutput produced by it");
  thrown.expectMessage("AddPc");
  thrown.expectMessage("Create");
  thrown.expectMessage(appended.expand().toString());
  hierarchy.setOutput(appended);
}

Source File: FixedInputRuntime.java From components with Apache License 2.0

5 votes

@Override
public PCollection<IndexedRecord> expand(PBegin begin) {
    FixedDatasetRuntime runtime = new FixedDatasetRuntime();
    runtime.initialize(null, properties.getDatasetProperties());

    // The values to include in the PCollection
    List<IndexedRecord> values = new LinkedList<>();

    if (properties.overrideValuesAction.getValue() == FixedInputProperties.OverrideValuesAction.NONE
            || properties.overrideValuesAction.getValue() == FixedInputProperties.OverrideValuesAction.APPEND) {
        if (!properties.getDatasetProperties().values.getValue().trim().isEmpty()) {
            values.addAll(runtime.getValues(Integer.MAX_VALUE));
        }
    }

    if (properties.overrideValuesAction.getValue() == FixedInputProperties.OverrideValuesAction.APPEND
            || properties.overrideValuesAction.getValue() == FixedInputProperties.OverrideValuesAction.REPLACE) {
        properties.getDatasetProperties().values.setValue(properties.overrideValues.getValue());
        if (!properties.getDatasetProperties().values.getValue().trim().isEmpty()) {
            values.addAll(runtime.getValues(Integer.MAX_VALUE));
        }
    }

    if (values.size() != 0) {
        PCollection<IndexedRecord> out = (PCollection<IndexedRecord>) begin
                .apply(Create.of(values).withCoder((AvroCoder) AvroCoder.of(runtime.getSchema())));
        if (properties.repeat.getValue() > 1) {
            PCollectionList<IndexedRecord> merged = PCollectionList.of(out);
            for (int i = 2; i < properties.repeat.getValue(); i++)
                merged = merged.and(out);
            out = merged.apply(Flatten.<IndexedRecord> pCollections());
        }
        return out;
    } else {
        return begin.apply(RowGeneratorIO.read().withSchema(runtime.getSchema()) //
                .withSeed(0L) //
                .withPartitions(1) //
                .withRows(properties.repeat.getValue()));
    }
}

Source File: BatchViewOverrides.java From beam with Apache License 2.0

4 votes

private static <K, V, W extends BoundedWindow, ViewT> PCollection<?> applyForMapLike(
    DataflowRunner runner,
    PCollection<KV<K, V>> input,
    PCollectionView<ViewT> view,
    boolean uniqueKeysExpected)
    throws NonDeterministicException {

  @SuppressWarnings("unchecked")
  Coder<W> windowCoder = (Coder<W>) input.getWindowingStrategy().getWindowFn().windowCoder();

  @SuppressWarnings({"rawtypes", "unchecked"})
  KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();

  // If our key coder is deterministic, we can use the key portion of each KV
  // part of a composite key containing the window , key and index.
  inputCoder.getKeyCoder().verifyDeterministic();

  IsmRecordCoder<WindowedValue<V>> ismCoder =
      coderForMapLike(windowCoder, inputCoder.getKeyCoder(), inputCoder.getValueCoder());

  // Create the various output tags representing the main output containing the data stream
  // and the additional outputs containing the metadata about the size and entry set.
  TupleTag<IsmRecord<WindowedValue<V>>> mainOutputTag = new TupleTag<>();
  TupleTag<KV<Integer, KV<W, Long>>> outputForSizeTag = new TupleTag<>();
  TupleTag<KV<Integer, KV<W, K>>> outputForEntrySetTag = new TupleTag<>();

  // Process all the elements grouped by key hash, and sorted by key and then window
  // outputting to all the outputs defined above.
  PCollectionTuple outputTuple =
      input
          .apply("GBKaSVForData", new GroupByKeyHashAndSortByKeyAndWindow<K, V, W>(ismCoder))
          .apply(
              ParDo.of(
                      new ToIsmRecordForMapLikeDoFn<>(
                          outputForSizeTag,
                          outputForEntrySetTag,
                          windowCoder,
                          inputCoder.getKeyCoder(),
                          ismCoder,
                          uniqueKeysExpected))
                  .withOutputTags(
                      mainOutputTag,
                      TupleTagList.of(
                          ImmutableList.of(outputForSizeTag, outputForEntrySetTag))));

  // Set the coder on the main data output.
  PCollection<IsmRecord<WindowedValue<V>>> perHashWithReifiedWindows =
      outputTuple.get(mainOutputTag);
  perHashWithReifiedWindows.setCoder(ismCoder);

  // Set the coder on the metadata output for size and process the entries
  // producing a [META, Window, 0L] record per window storing the number of unique keys
  // for each window.
  PCollection<KV<Integer, KV<W, Long>>> outputForSize = outputTuple.get(outputForSizeTag);
  outputForSize.setCoder(
      KvCoder.of(VarIntCoder.of(), KvCoder.of(windowCoder, VarLongCoder.of())));
  PCollection<IsmRecord<WindowedValue<V>>> windowMapSizeMetadata =
      outputForSize
          .apply("GBKaSVForSize", new GroupByKeyAndSortValuesOnly<>())
          .apply(ParDo.of(new ToIsmMetadataRecordForSizeDoFn<K, V, W>(windowCoder)));
  windowMapSizeMetadata.setCoder(ismCoder);

  // Set the coder on the metadata output destined to build the entry set and process the
  // entries producing a [META, Window, Index] record per window key pair storing the key.
  PCollection<KV<Integer, KV<W, K>>> outputForEntrySet = outputTuple.get(outputForEntrySetTag);
  outputForEntrySet.setCoder(
      KvCoder.of(VarIntCoder.of(), KvCoder.of(windowCoder, inputCoder.getKeyCoder())));
  PCollection<IsmRecord<WindowedValue<V>>> windowMapKeysMetadata =
      outputForEntrySet
          .apply("GBKaSVForKeys", new GroupByKeyAndSortValuesOnly<>())
          .apply(
              ParDo.of(
                  new ToIsmMetadataRecordForKeyDoFn<K, V, W>(
                      inputCoder.getKeyCoder(), windowCoder)));
  windowMapKeysMetadata.setCoder(ismCoder);

  // Set that all these outputs should be materialized using an indexed format.
  runner.addPCollectionRequiringIndexedFormat(perHashWithReifiedWindows);
  runner.addPCollectionRequiringIndexedFormat(windowMapSizeMetadata);
  runner.addPCollectionRequiringIndexedFormat(windowMapKeysMetadata);

  PCollectionList<IsmRecord<WindowedValue<V>>> outputs =
      PCollectionList.of(
          ImmutableList.of(
              perHashWithReifiedWindows, windowMapSizeMetadata, windowMapKeysMetadata));

  PCollection<IsmRecord<WindowedValue<V>>> flattenedOutputs =
      Pipeline.applyTransform(outputs, Flatten.pCollections());
  flattenedOutputs.apply(CreateDataflowView.forBatch(view));
  return flattenedOutputs;
}

Java Code Examples for org.apache.beam.sdk.values.PCollectionList#of()