org.apache.beam.sdk.transforms.DoFn.ProcessElement Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.DoFn.ProcessElement. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static DoFn<KV<String, Long>, Integer> timeSortedDoFn() {
  return new DoFn<KV<String, Long>, Integer>() {

    @StateId("last")
    private final StateSpec<ValueState<Long>> lastSpec = StateSpecs.value();

    @RequiresTimeSortedInput
    @ProcessElement
    public void process(
        @Element KV<String, Long> element,
        @StateId("last") ValueState<Long> last,
        OutputReceiver<Integer> output) {
      long lastVal = MoreObjects.firstNonNull(last.read(), element.getValue() - 1);
      last.write(element.getValue());
      output.output((int) (element.getValue() - lastVal));
    }
  };
}
 
Example #2
Source File: KafkaToBigQuery.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext context) {
  FailsafeElement<KV<String, String>, String> failsafeElement = context.element();
  final KV<String, String> message = failsafeElement.getOriginalPayload();

  // Format the timestamp for insertion
  String timestamp =
      TIMESTAMP_FORMATTER.print(context.timestamp().toDateTime(DateTimeZone.UTC));

  // Build the table row
  final TableRow failedRow =
      new TableRow()
          .set("timestamp", timestamp)
          .set("errorMessage", failsafeElement.getErrorMessage())
          .set("stacktrace", failsafeElement.getStacktrace());

  // Only set the payload if it's populated on the message.
  failedRow.set(
      "payloadString",
      "key: "
          + (message.getKey() == null ? "" : message.getKey())
          + "value: "
          + (message.getValue() == null ? "" : message.getValue()));
  context.output(failedRow);
}
 
Example #3
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testStateNotKeyed() {
  final String stateId = "foo";

  DoFn<String, Integer> fn =
      new DoFn<String, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> intState = StateSpecs.value();

        @ProcessElement
        public void processElement(
            ProcessContext c, @StateId(stateId) ValueState<Integer> state) {}
      };

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("state");
  thrown.expectMessage("KvCoder");

  pipeline.apply(Create.of("hello", "goodbye", "hello again")).apply(ParDo.of(fn));
}
 
Example #4
Source File: OpinionAnalysisPipeline.java    From dataflow-opinion-analysis with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {

	String rawInput = null;
	InputContent iContent = null;
	
	try {
		rawInput = c.element();
		if (rawInput == null)
			throw new Exception("ProcessRawInput: null raw content");
		rawInput = rawInput.trim();
		if (rawInput.isEmpty())
			throw new Exception("ProcessRawInput: empty raw content or whitespace chars only");
		iContent = InputContent.createInputContent(rawInput);

	} catch (Exception e) {
		LOG.warn(e.getMessage());
	}
	
	if (iContent != null) 
		c.output(iContent);
}
 
Example #5
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWithOutputTagsDisplayData() {
  DoFn<String, String> fn =
      new DoFn<String, String>() {
        @ProcessElement
        public void proccessElement(ProcessContext c) {}

        @Override
        public void populateDisplayData(Builder builder) {
          builder.add(DisplayData.item("fnMetadata", "foobar"));
        }
      };

  ParDo.MultiOutput<String, String> parDo =
      ParDo.of(fn).withOutputTags(new TupleTag<>(), TupleTagList.empty());

  DisplayData displayData = DisplayData.from(parDo);
  assertThat(displayData, includesDisplayDataFor("fn", fn));
  assertThat(displayData, hasDisplayItem("fn", fn.getClass()));
}
 
Example #6
Source File: IndexerPipeline.java    From dataflow-opinion-analysis with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
	InputContent i = c.element();
	String jobName = c.getPipelineOptions().getJobName();
	ByteString rowkey = ByteString.copyFromUtf8(jobName + "#" + i.expectedDocumentHash); 
	ByteString value = ByteString.copyFromUtf8(i.text);
	
	Iterable<Mutation> mutations =
		ImmutableList.of(Mutation.newBuilder()
			.setSetCell(
				Mutation.SetCell.newBuilder()
					.setFamilyName(IndexerPipelineUtils.DEAD_LETTER_TABLE_ERR_CF)
					.setColumnQualifier(ByteString.copyFromUtf8("text"))
					.setValue(value)
			)
               .build());
	
	c.output(KV.of(rowkey, mutations));			
}
 
Example #7
Source File: IndexerPipeline.java    From dataflow-opinion-analysis with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {

	String rawInput = null;
	InputContent iContent = null;
	
	try {
		rawInput = c.element();
		if (rawInput == null)
			throw new Exception("ProcessRawInput: null raw content");
		rawInput = rawInput.trim();
		if (rawInput.isEmpty())
			throw new Exception("ProcessRawInput: empty raw content or whitespace chars only");
		iContent = InputContent.createInputContent(rawInput);

	} catch (Exception e) {
		LOG.warn(e.getMessage());
	}
	
	if (iContent != null) 
		c.output(iContent);
}
 
Example #8
Source File: IndexerPipeline.java    From dataflow-opinion-analysis with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
	KV<String, Iterable<InputContent>> kv = c.element();
	String documentHash = kv.getKey();
	Iterable<InputContent> dupes = kv.getValue();
	boolean isFirst = true;
	int groupSize = Iterables.size(dupes);
	for (InputContent ic : dupes) {
	
		// Check if this doc was already processed and stored in BQ
		Map<String,Long> sideInputMap = c.sideInput(alreadyProcessedDocsSideInput);
		Long proTime = sideInputMap.get(ic.expectedDocumentHash);
		if (proTime!=null) {
			c.output(PipelineTags.contentNotToIndexExactDupesTag,ic);
			continue;
		}
		
		if (isFirst) {
			isFirst = false;
			c.output(ic);
		} else {
			c.output(PipelineTags.contentNotToIndexExactDupesTag,ic);
		}
	}
}
 
Example #9
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesSideInputs.class})
public void testSideInputAnnotationFailedValidationMultiMapType() {

  final PCollectionView<Map<Integer, Iterable<Integer>>> sideInput1 =
      pipeline
          .apply("CreateSideInput1", Create.of(KV.of(1, 2), KV.of(1, 3), KV.of(3, 4)))
          .apply("ViewSideInput1", View.asMultimap());

  // SideInput tag id
  final String sideInputTag1 = "tag1";

  DoFn<Integer, List<Integer>> fn =
      new DoFn<Integer, List<Integer>>() {
        @ProcessElement
        public void processElement(@SideInput(sideInputTag1) Map<Integer, Integer> tag1) {}
      };

  thrown.expect(IllegalArgumentException.class);
  PCollection<List<Integer>> output =
      pipeline
          .apply("Create main input", Create.of(2))
          .apply(ParDo.of(fn).withSideInput(sideInputTag1, sideInput1));
  pipeline.run();
}
 
Example #10
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesSideInputs.class})
public void testSideInputAnnotationFailedValidationMapType() {

  final PCollectionView<Map<Integer, Integer>> sideInput1 =
      pipeline
          .apply("CreateSideInput1", Create.of(KV.of(1, 2), KV.of(2, 3), KV.of(3, 4)))
          .apply("ViewSideInput1", View.asMap());

  // SideInput tag id
  final String sideInputTag1 = "tag1";

  DoFn<Integer, List<Integer>> fn =
      new DoFn<Integer, List<Integer>>() {
        @ProcessElement
        public void processElement(@SideInput(sideInputTag1) Map<String, String> tag1) {}
      };

  thrown.expect(IllegalArgumentException.class);
  PCollection<List<Integer>> output =
      pipeline
          .apply("Create main input", Create.of(2))
          .apply(ParDo.of(fn).withSideInput(sideInputTag1, sideInput1));
  pipeline.run();
}
 
Example #11
Source File: IndexerPipeline.java    From dataflow-opinion-analysis with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {

	String rawInput = null;
	InputContent iContent = null;
	
	try {
		rawInput = c.element();
		if (rawInput == null || rawInput == "")
			throw new Exception("ParseGDELTJsonInput: null or empty raw content");

		iContent = InputContent.createInputContentFromGDELTJson(rawInput);
		
		// Skip non-English content for now
		if (!iContent.language.equals("EN"))
			iContent = null;

	} catch (Exception e) {
		LOG.warn(e.getMessage());
	}
	
	if (iContent != null) 
		c.output(iContent);
}
 
Example #12
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesSideInputs.class})
public void testSideInputAnnotationFailedValidationIterableType() {

  final PCollectionView<Iterable<Integer>> sideInput1 =
      pipeline
          .apply("CreateSideInput1", Create.of(2, 1, 0))
          .apply("ViewSideInput1", View.asIterable());

  // SideInput tag id
  final String sideInputTag1 = "tag1";

  DoFn<Integer, List<Integer>> fn =
      new DoFn<Integer, List<Integer>>() {
        @ProcessElement
        public void processElement(@SideInput(sideInputTag1) List<String> tag1) {}
      };

  thrown.expect(IllegalArgumentException.class);
  PCollection<List<Integer>> output =
      pipeline
          .apply("Create main input", Create.of(2))
          .apply(ParDo.of(fn).withSideInput(sideInputTag1, sideInput1));
  pipeline.run();
}
 
Example #13
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesSideInputs.class})
public void testSideInputAnnotationFailedValidationListType() {

  final PCollectionView<List<Integer>> sideInput1 =
      pipeline
          .apply("CreateSideInput1", Create.of(2, 1, 0))
          .apply("ViewSideInput1", View.asList());

  // SideInput tag id
  final String sideInputTag1 = "tag1";

  DoFn<Integer, List<Integer>> fn =
      new DoFn<Integer, List<Integer>>() {
        @ProcessElement
        public void processElement(@SideInput(sideInputTag1) List<String> tag1) {}
      };

  thrown.expect(IllegalArgumentException.class);
  PCollection<List<Integer>> output =
      pipeline
          .apply("Create main input", Create.of(2))
          .apply(ParDo.of(fn).withSideInput(sideInputTag1, sideInput1));
  pipeline.run();
}
 
Example #14
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesSideInputs.class})
public void testSideInputAnnotationFailedValidationSingletonType() {

  final PCollectionView<Integer> sideInput1 =
      pipeline
          .apply("CreateSideInput1", Create.of(2))
          .apply("ViewSideInput1", View.asSingleton());

  // SideInput tag id
  final String sideInputTag1 = "tag1";

  DoFn<Integer, List<Integer>> fn =
      new DoFn<Integer, List<Integer>>() {
        @ProcessElement
        public void processElement(@SideInput(sideInputTag1) String tag1) {}
      };

  thrown.expect(IllegalArgumentException.class);
  PCollection<List<Integer>> output =
      pipeline
          .apply("Create main input", Create.of(2))
          .apply(ParDo.of(fn).withSideInput(sideInputTag1, sideInput1));
  pipeline.run();
}
 
Example #15
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesSideInputs.class})
public void testSideInputAnnotationFailedValidationMissing() {
  // SideInput tag id
  final String sideInputTag1 = "tag1";

  DoFn<Integer, List<Integer>> fn =
      new DoFn<Integer, List<Integer>>() {
        @ProcessElement
        public void processElement(@SideInput(sideInputTag1) String tag1) {}
      };

  thrown.expect(IllegalArgumentException.class);
  PCollection<List<Integer>> output =
      pipeline.apply("Create main input", Create.of(2)).apply(ParDo.of(fn));
  pipeline.run();
}
 
Example #16
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testPipelineOptionsParameter() {
  PCollection<String> results =
      pipeline
          .apply(Create.of(1))
          .apply(
              ParDo.of(
                  new DoFn<Integer, String>() {
                    @ProcessElement
                    public void process(OutputReceiver<String> r, PipelineOptions options) {
                      r.output(options.as(MyOptions.class).getFakeOption());
                    }
                  }));

  String testOptionValue = "not fake anymore";
  pipeline.getOptions().as(MyOptions.class).setFakeOption(testOptionValue);
  PAssert.that(results).containsInAnyOrder("not fake anymore");

  pipeline.run();
}
 
Example #17
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRejectsWrongWindowType() {

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage(GlobalWindow.class.getSimpleName());
  thrown.expectMessage(IntervalWindow.class.getSimpleName());
  thrown.expectMessage("window type");
  thrown.expectMessage("not a supertype");

  pipeline
      .apply(Create.of(1, 2, 3))
      .apply(
          ParDo.of(
              new DoFn<Integer, Integer>() {
                @ProcessElement
                public void process(ProcessContext c, IntervalWindow w) {}
              }));
}
 
Example #18
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testDoFnWithContextDisplayData() {
  DoFn<String, String> fn =
      new DoFn<String, String>() {
        @ProcessElement
        public void proccessElement(ProcessContext c) {}

        @Override
        public void populateDisplayData(Builder builder) {
          builder.add(DisplayData.item("fnMetadata", "foobar"));
        }
      };

  SingleOutput<String, String> parDo = ParDo.of(fn);

  DisplayData displayData = DisplayData.from(parDo);
  assertThat(displayData, includesDisplayDataFor("fn", fn));
  assertThat(displayData, hasDisplayItem("fn", fn.getClass()));
}
 
Example #19
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testDoFnDisplayData() {
  DoFn<String, String> fn =
      new DoFn<String, String>() {
        @ProcessElement
        public void processElement(ProcessContext c) {}

        @Override
        public void populateDisplayData(Builder builder) {
          builder.add(DisplayData.item("doFnMetadata", "bar"));
        }
      };

  SingleOutput<String, String> parDo = ParDo.of(fn);

  DisplayData displayData = DisplayData.from(parDo);
  assertThat(
      displayData,
      hasDisplayItem(
          allOf(
              hasKey("fn"),
              hasType(DisplayData.Type.JAVA_CLASS),
              DisplayDataMatchers.hasValue(fn.getClass().getName()))));

  assertThat(displayData, includesDisplayDataFor("fn", fn));
}
 
Example #20
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testStateNotDeterministic() {
  final String stateId = "foo";

  // DoubleCoder is not deterministic, so this should crash
  DoFn<KV<Double, String>, Integer> fn =
      new DoFn<KV<Double, String>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> intState = StateSpecs.value();

        @ProcessElement
        public void processElement(
            ProcessContext c, @StateId(stateId) ValueState<Integer> state) {}
      };

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("state");
  thrown.expectMessage("deterministic");

  pipeline
      .apply(Create.of(KV.of(1.0, "hello"), KV.of(5.4, "goodbye"), KV.of(7.2, "hello again")))
      .apply(ParDo.of(fn));
}
 
Example #21
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testParDoTaggedOutputWithTimestamp() {

  PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(3, 42, 6)));

  final TupleTag<Integer> mainOutputTag = new TupleTag<Integer>("main") {};
  final TupleTag<Integer> additionalOutputTag = new TupleTag<Integer>("additional") {};

  PCollection<String> output =
      input
          .apply(
              ParDo.of(
                      new DoFn<Integer, Integer>() {
                        @ProcessElement
                        public void processElement(
                            @Element Integer element, MultiOutputReceiver r) {
                          r.get(additionalOutputTag)
                              .outputWithTimestamp(element, new Instant(element.longValue()));
                        }
                      })
                  .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)))
          .get(additionalOutputTag)
          .apply(ParDo.of(new TestShiftTimestampDoFn<>(Duration.ZERO, Duration.ZERO)))
          .apply(ParDo.of(new TestFormatTimestampDoFn<>()));

  PAssert.that(output)
      .containsInAnyOrder(
          "processing: 3, timestamp: 3",
          "processing: 42, timestamp: 42",
          "processing: 6, timestamp: 6");

  pipeline.run();
}
 
Example #22
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testStatefulDoFnDisplayData() {
  DoFn<KV<String, String>, String> fn =
      new DoFn<KV<String, String>, String>() {
        @StateId("int")
        final StateSpec<ValueState<Integer>> intState = StateSpecs.value();

        @StateId("map")
        final StateSpec<MapState<String, SerializableClass>> mapState = StateSpecs.map();

        @ProcessElement
        public void proccessElement(ProcessContext c) {}
      };

  SingleOutput<KV<String, String>, String> parDo = ParDo.of(fn);

  // Use the parDo in a pipeline to cause state coders to be inferred.
  pipeline.apply(Create.of(KV.of("input", "value"))).apply(parDo);

  DisplayData displayData = DisplayData.from(parDo);
  assertThat(
      displayData,
      hasDisplayItem(
          allOf(
              hasKey("state_int"),
              hasType(DisplayData.Type.STRING),
              hasValue("ValueState<VarIntCoder>"),
              hasLabel("State \"int\""))));
  assertThat(
      displayData,
      hasDisplayItem(
          allOf(
              hasKey("state_map"),
              hasType(DisplayData.Type.STRING),
              hasValue(
                  "MapState<StringUtf8Coder, "
                      + "SerializableCoder(org.apache.beam.sdk.transforms.ParDoTest"
                      + "$BasicTests$SerializableClass)>"),
              hasLabel("State \"map\""))));
}
 
Example #23
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testParDoWithOnlyTaggedOutput() {

  List<Integer> inputs = Arrays.asList(3, -42, 666);

  final TupleTag<Void> mainOutputTag = new TupleTag<Void>("main") {};
  final TupleTag<Integer> additionalOutputTag = new TupleTag<Integer>("additional") {};

  PCollectionTuple outputs =
      pipeline
          .apply(Create.of(inputs))
          .apply(
              ParDo.of(
                      new DoFn<Integer, Void>() {
                        @ProcessElement
                        public void processElement(
                            @Element Integer element, MultiOutputReceiver r) {
                          r.get(additionalOutputTag).output(element);
                        }
                      })
                  .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));

  PAssert.that(outputs.get(mainOutputTag)).empty();
  PAssert.that(outputs.get(additionalOutputTag)).containsInAnyOrder(inputs);

  pipeline.run();
}
 
Example #24
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultiOutputAppliedMultipleTimesDifferentOutputs() {
  pipeline.enableAbandonedNodeEnforcement(false);
  PCollection<Long> longs = pipeline.apply(GenerateSequence.from(0));

  TupleTag<Long> mainOut = new TupleTag<>();
  final TupleTag<String> valueAsString = new TupleTag<>();
  final TupleTag<Integer> valueAsInt = new TupleTag<>();
  DoFn<Long, Long> fn =
      new DoFn<Long, Long>() {
        @ProcessElement
        public void processElement(ProcessContext cxt, @Element Long element) {
          cxt.output(cxt.element());
          cxt.output(valueAsString, Long.toString(cxt.element()));
          cxt.output(valueAsInt, element.intValue());
        }
      };

  ParDo.MultiOutput<Long, Long> parDo =
      ParDo.of(fn).withOutputTags(mainOut, TupleTagList.of(valueAsString).and(valueAsInt));
  PCollectionTuple firstApplication = longs.apply("first", parDo);
  PCollectionTuple secondApplication = longs.apply("second", parDo);
  assertThat(firstApplication, not(equalTo(secondApplication)));
  assertThat(
      firstApplication.getAll().keySet(),
      Matchers.containsInAnyOrder(mainOut, valueAsString, valueAsInt));
  assertThat(
      secondApplication.getAll().keySet(),
      Matchers.containsInAnyOrder(mainOut, valueAsString, valueAsInt));
}
 
Example #25
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testWindowingInStartAndFinishBundle() {

  final FixedWindows windowFn = FixedWindows.of(Duration.millis(1));
  PCollection<String> output =
      pipeline
          .apply(Create.timestamped(TimestampedValue.of("elem", new Instant(1))))
          .apply(Window.into(windowFn))
          .apply(
              ParDo.of(
                  new DoFn<String, String>() {
                    @ProcessElement
                    public void processElement(
                        @Element String element,
                        @Timestamp Instant timestamp,
                        OutputReceiver<String> r) {
                      r.output(element);
                      System.out.println("Process: " + element + ":" + timestamp.getMillis());
                    }

                    @FinishBundle
                    public void finishBundle(FinishBundleContext c) {
                      Instant ts = new Instant(3);
                      c.output("finish", ts, windowFn.assignWindow(ts));
                      System.out.println("Finish: 3");
                    }
                  }))
          .apply(ParDo.of(new PrintingDoFn()));

  PAssert.that(output).satisfies(new Checker());

  pipeline.run();
}
 
Example #26
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testJsonEscaping() {
  // Declare an arbitrary function and make sure we can serialize it
  DoFn<Integer, Integer> doFn =
      new DoFn<Integer, Integer>() {
        @ProcessElement
        public void processElement(@Element Integer element, OutputReceiver<Integer> r) {
          r.output(element + 1);
        }
      };

  byte[] serializedBytes = serializeToByteArray(doFn);
  String serializedJson = byteArrayToJsonString(serializedBytes);
  assertArrayEquals(serializedBytes, jsonStringToByteArray(serializedJson));
}
 
Example #27
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(@Element Integer element, OutputReceiver<Integer> r)
    throws Exception {
  if (element % divisor == 0) {
    r.output(element);
  }
}
 
Example #28
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElem(
    ProcessContext c,
    @SideInput(sideInputTag1) Integer sideInputTag,
    @StateId(stateId) BagState<MyInteger> state) {
  state.add(new MyInteger(sideInputTag));
  c.output(sideInputTag);
}
 
Example #29
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesStatefulParDo.class,
  DataflowPortabilityApiUnsupported.class
})
public void testValueStateCoderInferenceFromInputCoder() {
  final String stateId = "foo";
  MyIntegerCoder myIntegerCoder = MyIntegerCoder.of();

  DoFn<KV<String, MyInteger>, MyInteger> fn =
      new DoFn<KV<String, MyInteger>, MyInteger>() {

        @StateId(stateId)
        private final StateSpec<ValueState<MyInteger>> intState = StateSpecs.value();

        @ProcessElement
        public void processElement(
            @StateId(stateId) ValueState<MyInteger> state, OutputReceiver<MyInteger> r) {
          MyInteger currentValue = MoreObjects.firstNonNull(state.read(), new MyInteger(0));
          r.output(currentValue);
          state.write(new MyInteger(currentValue.getValue() + 1));
        }
      };

  pipeline
      .apply(
          Create.of(
                  KV.of("hello", new MyInteger(42)),
                  KV.of("hello", new MyInteger(97)),
                  KV.of("hello", new MyInteger(84)))
              .withCoder(KvCoder.of(StringUtf8Coder.of(), myIntegerCoder)))
      .apply(ParDo.of(fn))
      .setCoder(myIntegerCoder);

  pipeline.run();
}
 
Example #30
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesStatefulParDo.class})
public void testBagStateCoderInferenceFailure() throws Exception {
  final String stateId = "foo";
  Coder<MyInteger> myIntegerCoder = MyIntegerCoder.of();

  DoFn<KV<String, Integer>, List<MyInteger>> fn =
      new DoFn<KV<String, Integer>, List<MyInteger>>() {

        @StateId(stateId)
        private final StateSpec<BagState<MyInteger>> bufferState = StateSpecs.bag();

        @ProcessElement
        public void processElement(
            @Element KV<String, Integer> element,
            @StateId(stateId) BagState<MyInteger> state,
            OutputReceiver<List<MyInteger>> r) {
          state.add(new MyInteger(element.getValue()));
          Iterable<MyInteger> currentValue = state.read();
          if (Iterables.size(currentValue) >= 4) {
            List<MyInteger> sorted = Lists.newArrayList(currentValue);
            Collections.sort(sorted);
            r.output(sorted);
          }
        }
      };

  thrown.expect(RuntimeException.class);
  thrown.expectMessage("Unable to infer a coder for BagState and no Coder was specified.");

  pipeline
      .apply(
          Create.of(
              KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 84), KV.of("hello", 12)))
      .apply(ParDo.of(fn))
      .setCoder(ListCoder.of(myIntegerCoder));

  pipeline.run();
}