org.apache.beam.sdk.values.PCollection Java Examples
The following examples show how to use
org.apache.beam.sdk.values.PCollection.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlatMapElementsTest.java From beam with Apache License 2.0 | 7 votes |
/** * Basic test of {@link FlatMapElements} with a lambda (which is instantiated as a {@link * ProcessFunction}). */ @Test @Category(NeedsRunner.class) public void testFlatMapBasicWithLambda() throws Exception { PCollection<Integer> output = pipeline .apply(Create.of(1, 2, 3)) .apply( FlatMapElements // Note that the input type annotation is required. .into(TypeDescriptors.integers()) .via((Integer i) -> ImmutableList.of(i, -i))); PAssert.that(output).containsInAnyOrder(1, 3, -1, -3, 2, -2); pipeline.run(); }
Example #2
Source File: EvaluationContext.java From beam with Apache License 2.0 | 6 votes |
/** * Add output of transform to context map and possibly cache if it conforms {@link * #shouldCache(PTransform, PValue)}. * * @param transform from which Dataset was created * @param pvalue output of transform * @param dataset created Dataset from transform */ private void putDataset( @Nullable PTransform<?, ? extends PValue> transform, PValue pvalue, Dataset dataset) { try { dataset.setName(pvalue.getName()); } catch (IllegalStateException e) { // name not set, ignore } if (shouldCache(transform, pvalue)) { // we cache only PCollection Coder<?> coder = ((PCollection<?>) pvalue).getCoder(); Coder<? extends BoundedWindow> wCoder = ((PCollection<?>) pvalue).getWindowingStrategy().getWindowFn().windowCoder(); dataset.cache(storageLevel(), WindowedValue.getFullCoder(coder, wCoder)); } datasets.put(pvalue, dataset); leaves.add(dataset); }
Example #3
Source File: TestBoundedTable.java From beam with Apache License 2.0 | 6 votes |
@Override public POutput buildIOWriter(PCollection<Row> input) { input.apply( ParDo.of( new DoFn<Row, Void>() { @ProcessElement public void processElement(ProcessContext c) { CONTENT.add(c.element()); } @Teardown public void close() { CONTENT.clear(); } })); return PDone.in(input.getPipeline()); }
Example #4
Source File: NativeWrappedIOTest.java From component-runtime with Apache License 2.0 | 6 votes |
@Test public void source() { final String plugin = COMPONENTS.getTestPlugins().iterator().next(); final PTransform<PBegin, PCollection<JsonObject>> jdbc = PTransform.class .cast(COMPONENTS .asManager() .createComponent("beamtest", "source", ComponentManager.ComponentType.MAPPER, 1, emptyMap()) .orElseThrow(() -> new IllegalArgumentException("no beamtest#source component"))); PAssert .that(pipeline.apply(jdbc).setCoder(JsonpJsonObjectCoder.of(plugin))) .satisfies((SerializableFunction<Iterable<JsonObject>, Void>) input -> { assertEquals("test", input.iterator().next().getString("id")); return null; }); pipeline.run().waitUntilFinish(); }
Example #5
Source File: SampleTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category(NeedsRunner.class) public void testSampleAnyZero() { PCollection<Integer> input = pipeline.apply( Create.timestamped(ImmutableList.of(tv(0), tv(1), tv(2), tv(3), tv(4), tv(5))) .withCoder(BigEndianIntegerCoder.of())); PCollection<Integer> output = input .apply(Window.into(FixedWindows.of(Duration.standardSeconds(3)))) .apply(Sample.any(0)); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(0), Duration.standardSeconds(3))) .satisfies(new VerifyCorrectSample<>(0, EMPTY)); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(3000), Duration.standardSeconds(3))) .satisfies(new VerifyCorrectSample<>(0, EMPTY)); pipeline.run(); }
Example #6
Source File: PTransformTranslationTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void toAndFromProto() throws IOException { SdkComponents components = SdkComponents.create(spec.getTransform().getPipeline().getOptions()); RunnerApi.PTransform converted = convert(spec, components); Components protoComponents = components.toComponents(); // Sanity checks assertThat(converted.getInputsCount(), equalTo(spec.getTransform().getInputs().size())); assertThat(converted.getOutputsCount(), equalTo(spec.getTransform().getOutputs().size())); assertThat(converted.getSubtransformsCount(), equalTo(spec.getChildren().size())); assertThat(converted.getUniqueName(), equalTo(spec.getTransform().getFullName())); for (PValue inputValue : spec.getTransform().getInputs().values()) { PCollection<?> inputPc = (PCollection<?>) inputValue; protoComponents.getPcollectionsOrThrow(components.registerPCollection(inputPc)); } for (PValue outputValue : spec.getTransform().getOutputs().values()) { PCollection<?> outputPc = (PCollection<?>) outputValue; protoComponents.getPcollectionsOrThrow(components.registerPCollection(outputPc)); } }
Example #7
Source File: ForwardingPTransformTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void getDefaultOutputCoderDelegates() throws Exception { @SuppressWarnings("unchecked") PCollection<Integer> input = PCollection.createPrimitiveOutputInternal( null /* pipeline */, WindowingStrategy.globalDefault(), PCollection.IsBounded.BOUNDED, null /* coder */); @SuppressWarnings("unchecked") PCollection<String> output = PCollection.createPrimitiveOutputInternal( null /* pipeline */, WindowingStrategy.globalDefault(), PCollection.IsBounded.BOUNDED, null /* coder */); @SuppressWarnings("unchecked") Coder<String> outputCoder = mock(Coder.class); when(delegate.expand(input)).thenReturn(output); when(delegate.getDefaultOutputCoder(input, output)).thenReturn(outputCoder); assertThat(forwarding.expand(input).getCoder(), equalTo(outputCoder)); }
Example #8
Source File: BeamSalUhfSpecialTypeAndValueTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testIsInf() throws Exception { Schema resultType = Schema.builder() .addBooleanField("field_1") .addBooleanField("field_2") .addBooleanField("field_3") .addBooleanField("field_4") .build(); Row resultRow = Row.withSchema(resultType).addValues(true, true, true, true).build(); String sql = "SELECT IS_INF(f_float_1), IS_INF(f_double_1), IS_INF(f_float_2), IS_INF(f_double_2) FROM PCOLLECTION"; PCollection<Row> result = boundedInputFloatDouble.apply("testUdf", SqlTransform.query(sql)); PAssert.that(result).containsInAnyOrder(resultRow); pipeline.run().waitUntilFinish(); }
Example #9
Source File: OuterRightJoinTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testJoinNoneToOneMapping() { leftListOfKv.add(KV.of("Key2", 4L)); PCollection<KV<String, Long>> leftCollection = p.apply("CreateLeft", Create.of(leftListOfKv)); rightListOfKv.add(KV.of("Key3", "bar")); PCollection<KV<String, String>> rightCollection = p.apply("CreateRight", Create.of(rightListOfKv)); PCollection<KV<String, KV<Long, String>>> output = Join.rightOuterJoin(leftCollection, rightCollection, -1L); expectedResult.add(KV.of("Key3", KV.of(-1L, "bar"))); PAssert.that(output).containsInAnyOrder(expectedResult); p.run(); }
Example #10
Source File: JoinTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testBuild_ImplicitName() { final Pipeline pipeline = TestUtils.createTestPipeline(); final PCollection<String> left = TestUtils.createMockDataset(pipeline, TypeDescriptors.strings()); final PCollection<String> right = TestUtils.createMockDataset(pipeline, TypeDescriptors.strings()); final PCollection<KV<Integer, String>> joined = Join.of(left, right) .by(String::length, String::length) .using( (String l, String r, Collector<String> c) -> { // no-op }) .output(); final Join join = (Join) TestUtils.getProducer(joined); assertFalse(join.getName().isPresent()); }
Example #11
Source File: FlattenTranslatorBatch.java From beam with Apache License 2.0 | 6 votes |
@Override public void translateTransform( PTransform<PCollectionList<T>, PCollection<T>> transform, TranslationContext context) { Collection<PValue> pcollectionList = context.getInputs().values(); Dataset<WindowedValue<T>> result = null; if (pcollectionList.isEmpty()) { result = context.emptyDataset(); } else { for (PValue pValue : pcollectionList) { checkArgument( pValue instanceof PCollection, "Got non-PCollection input to flatten: %s of type %s", pValue, pValue.getClass().getSimpleName()); @SuppressWarnings("unchecked") PCollection<T> pCollection = (PCollection<T>) pValue; Dataset<WindowedValue<T>> current = context.getDataset(pCollection); if (result == null) { result = current; } else { result = result.union(current); } } } context.putDataset(context.getOutput(), result); }
Example #12
Source File: ReadEvaluatorFactory.java From beam with Apache License 2.0 | 6 votes |
@Override public Collection<CommittedBundle<SourceShard<T>>> getInitialInputs( AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> appliedTransform, int targetParallelism) throws Exception { switch (ReadTranslation.sourceIsBounded(appliedTransform)) { case BOUNDED: // This cast could be made unnecessary, but too much bounded polymorphism return (Collection) boundedInputProvider.getInitialInputs(appliedTransform, targetParallelism); case UNBOUNDED: // This cast could be made unnecessary, but too much bounded polymorphism return (Collection) unboundedInputProvider.getInitialInputs(appliedTransform, targetParallelism); default: throw new IllegalArgumentException("PCollection is neither bounded nor unbounded?!?"); } }
Example #13
Source File: SimpleFileIOInputRuntimeTest.java From components with Apache License 2.0 | 6 votes |
/** * Basic unit test using all default values (except for the path) on an in-memory DFS cluster. */ @Test public void testBasicAvro() throws IOException, URISyntaxException { RecordSet rs = getSimpleTestData(0); writeRandomAvroFile(mini.getFs(), "/user/test/input.avro", rs); String fileSpec = mini.getFs().getUri().resolve("/user/test/input.avro").toString(); // Configure the component. SimpleFileIOInputProperties inputProps = createInputComponentProperties(); inputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO); inputProps.getDatasetProperties().path.setValue(fileSpec); // Create the runtime. SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime(); runtime.initialize(null, inputProps); // Use the runtime in a direct pipeline to test. final Pipeline p = beam.createPipeline(); PCollection<IndexedRecord> readLines = p.apply(runtime); // Check the expected values. PAssert.that(readLines).containsInAnyOrder(rs.getAllData()); // And run the test. p.run().waitUntilFinish(); }
Example #14
Source File: ViewTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category(ValidatesRunner.class) public void testSingletonSideInput() { final PCollectionView<Integer> view = pipeline.apply("Create47", Create.of(47)).apply(View.asSingleton()); PCollection<Integer> output = pipeline .apply("Create123", Create.of(1, 2, 3)) .apply( "OutputSideInputs", ParDo.of( new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.sideInput(view)); } }) .withSideInputs(view)); PAssert.that(output).containsInAnyOrder(47, 47, 47); pipeline.run(); }
Example #15
Source File: PubsubIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testReadMessagesWithCoderAndParseFn() { Coder<PubsubMessage> coder = PubsubMessagePayloadOnlyCoder.of(); List<PubsubMessage> inputs = ImmutableList.of( new PubsubMessage("foo".getBytes(StandardCharsets.UTF_8), new HashMap<>()), new PubsubMessage("bar".getBytes(StandardCharsets.UTF_8), new HashMap<>())); setupTestClient(inputs, coder); PCollection<String> read = readPipeline.apply( PubsubIO.readMessagesWithCoderAndParseFn( StringUtf8Coder.of(), new StringPayloadParseFn()) .fromSubscription(SUBSCRIPTION.getPath()) .withClock(CLOCK) .withClientFactory(clientFactory)); List<String> outputs = ImmutableList.of("foo", "bar"); PAssert.that(read).containsInAnyOrder(outputs); readPipeline.run(); }
Example #16
Source File: JacksonTransformsTest.java From beam with Apache License 2.0 | 5 votes |
@Test(expected = Pipeline.PipelineExecutionException.class) public void failParsingWithoutCustomMapper() { PCollection<MyPojo> output = pipeline .apply(Create.of(EXTRA_PROPERTIES_JSONS)) .apply(ParseJsons.of(MyPojo.class)) .setCoder(SerializableCoder.of(MyPojo.class)); PAssert.that(output).empty(); pipeline.run(); }
Example #17
Source File: SplittableDoFnTest.java From beam with Apache License 2.0 | 5 votes |
private void testWindowedSideInput(IsBounded bounded) { PCollection<Integer> mainInput = p.apply( "main", Create.timestamped( TimestampedValue.of(0, new Instant(0)), TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(2)), TimestampedValue.of(3, new Instant(3)), TimestampedValue.of(4, new Instant(4)), TimestampedValue.of(5, new Instant(5)), TimestampedValue.of(6, new Instant(6)), TimestampedValue.of(7, new Instant(7)))) .apply("window 2", Window.into(FixedWindows.of(Duration.millis(2)))); PCollectionView<String> sideInput = p.apply( "side", Create.timestamped( TimestampedValue.of("a", new Instant(0)), TimestampedValue.of("b", new Instant(4)))) .apply("window 4", Window.into(FixedWindows.of(Duration.millis(4)))) .apply("singleton", View.asSingleton()); PCollection<String> res = mainInput.apply(ParDo.of(sdfWithSideInput(bounded, sideInput)).withSideInputs(sideInput)); PAssert.that(res).containsInAnyOrder("a:0", "a:1", "a:2", "a:3", "b:4", "b:5", "b:6", "b:7"); p.run(); }
Example #18
Source File: TaskTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void combine_binaryCombineFn_lambda() { Create.Values<BigInteger> values = Create.of( BigInteger.valueOf(10), BigInteger.valueOf(20), BigInteger.valueOf(30), BigInteger.valueOf(40), BigInteger.valueOf(50) ); PCollection<BigInteger> numbers = testPipeline.apply(values); PCollection<BigInteger> results = Task.applyTransform(numbers); PAssert.that(results) .containsInAnyOrder(BigInteger.valueOf(150)); testPipeline.run().waitUntilFinish(); }
Example #19
Source File: ViewTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(ValidatesRunner.class) public void testWindowedSideInputFixedToGlobal() { final PCollectionView<Integer> view = pipeline .apply( "CreateSideInput", Create.timestamped( TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(11)), TimestampedValue.of(3, new Instant(13)))) .apply("WindowSideInput", Window.into(new GlobalWindows())) .apply(Sum.integersGlobally()) .apply(View.asSingleton()); PCollection<String> output = pipeline .apply( "CreateMainInput", Create.timestamped( TimestampedValue.of("A", new Instant(4)), TimestampedValue.of("B", new Instant(15)), TimestampedValue.of("C", new Instant(7)))) .apply("WindowMainInput", Window.into(FixedWindows.of(Duration.millis(10)))) .apply( "OutputMainAndSideInputs", ParDo.of( new DoFn<String, String>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.element() + c.sideInput(view)); } }) .withSideInputs(view)); PAssert.that(output).containsInAnyOrder("A6", "B6", "C6"); pipeline.run(); }
Example #20
Source File: HadoopFormatIOReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testReadingData() { HadoopFormatIO.Read<Text, Employee> read = HadoopFormatIO.<Text, Employee>read().withConfiguration(serConf.get()); List<KV<Text, Employee>> expected = TestEmployeeDataSet.getEmployeeData(); PCollection<KV<Text, Employee>> actual = p.apply("ReadTest", read); PAssert.that(actual).containsInAnyOrder(expected); p.run(); }
Example #21
Source File: HllCountTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void testExtractGloballyForEmptySketch() { PCollection<Long> result = p.apply(Create.of(EMPTY_SKETCH)).apply(HllCount.Extract.globally()); PAssert.thatSingleton(result).isEqualTo(0L); p.run(); }
Example #22
Source File: JacksonTransformsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testParsingInvalidJsonsWithFailuresDefaultHandler() { WithFailures.Result<PCollection<MyPojo>, KV<String, Map<String, String>>> result = pipeline .apply(Create.of(Iterables.concat(VALID_JSONS, INVALID_JSONS))) .apply(ParseJsons.of(MyPojo.class).exceptionsVia()); result.output().setCoder(SerializableCoder.of(MyPojo.class)); PAssert.that(result.output()).containsInAnyOrder(POJOS); assertParsingWithErrorMapHandler(result); pipeline.run(); }
Example #23
Source File: SolrIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testRead() throws Exception { SolrIOTestUtils.insertTestDocuments(SOLR_COLLECTION, NUM_DOCS, solrClient); PCollection<SolrDocument> output = pipeline.apply( SolrIO.read() .withConnectionConfiguration(connectionConfiguration) .from(SOLR_COLLECTION) .withBatchSize(101)); PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(NUM_DOCS); pipeline.run(); }
Example #24
Source File: ParDoSchemaTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(ValidatesRunner.class) public void testSchemaFieldDescriptorSelectionUnboxing() { List<ForExtraction> pojoList = Lists.newArrayList( new AutoValue_ParDoSchemaTest_ForExtraction(1, "a", Lists.newArrayList(1, 2)), new AutoValue_ParDoSchemaTest_ForExtraction(2, "b", Lists.newArrayList(2, 3)), new AutoValue_ParDoSchemaTest_ForExtraction(3, "c", Lists.newArrayList(3, 4))); PCollection<String> output = pipeline .apply(Create.of(pojoList)) .apply( ParDo.of( new DoFn<ForExtraction, String>() { @FieldAccess("stringSelector") final FieldAccessDescriptor stringSelector = FieldAccessDescriptor.withFieldNames("stringField"); @FieldAccess("intSelector") final FieldAccessDescriptor intSelector = FieldAccessDescriptor.withFieldNames("integerField"); @FieldAccess("intsSelector") final FieldAccessDescriptor intsSelector = FieldAccessDescriptor.withFieldNames("ints"); @ProcessElement public void process( @FieldAccess("stringSelector") String stringField, @FieldAccess("intSelector") int integerField, @FieldAccess("intsSelector") int[] intArray, OutputReceiver<String> r) { r.output( stringField + ":" + integerField + ":" + Arrays.toString(intArray)); } })); PAssert.that(output).containsInAnyOrder("a:1:[1, 2]", "b:2:[2, 3]", "c:3:[3, 4]"); pipeline.run(); }
Example #25
Source File: VerifyBamIdTest.java From dataflow-java with Apache License 2.0 | 5 votes |
@Test public void testCombineReads() throws Exception { PCollection<KV<Position, AlleleFreq>> refCounts = p.apply("createInput", Create.of(refCountList)); PAssert.that(refCounts).containsInAnyOrder(refCountList); Read read = Read.newBuilder() .setProperPlacement(true) .setAlignment(LinearAlignment.newBuilder() .setPosition(com.google.genomics.v1.Position.newBuilder() .setReferenceName("1") .setPosition(123)) .addCigar(CigarUnit.newBuilder() .setOperation(Operation.ALIGNMENT_MATCH) .setOperationLength(3))) .setAlignedSequence("ATG") .addAllAlignedQuality(ImmutableList.of(3, 4, 5)) .build(); PCollection<Read> reads = p.apply(Create.of(read)); PAssert.that(reads).containsInAnyOrder(read); PCollection<KV<Position, ReadCounts>> results = VerifyBamId.combineReads(reads, 1.0, "", refCounts); ReadCounts one = new ReadCounts(); one.setRefFreq(0.8); one.addReadQualityCount(ReadQualityCount.Base.REF, 3, 1L); ReadCounts two = new ReadCounts(); two.setRefFreq(0.5); two.addReadQualityCount(ReadQualityCount.Base.NONREF, 4, 1L); ReadCounts three = new ReadCounts(); three.setRefFreq(0.6); three.addReadQualityCount(ReadQualityCount.Base.OTHER, 5, 1L); PAssert.that(results) .containsInAnyOrder(KV.of(position1, one), KV.of(position2, two), KV.of(position3, three)); p.run(); }
Example #26
Source File: BeamModel.java From streamingbook with Apache License 2.0 | 5 votes |
@Override public PCollection<String> expand(PCollection<KV<String, Integer>> input) { return input .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES)) .triggering(AfterWatermark.pastEndOfWindow() .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE)) .withLateFirings(AfterPane.elementCountAtLeast(1))) .withAllowedLateness(TWO_MINUTES) .accumulatingFiredPanes()) .apply(Sum.integersPerKey()) .apply(ParDo.of(new FormatAsStrings())); }
Example #27
Source File: FlattenTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(ValidatesRunner.class) public void testFlattenPCollections() { List<List<String>> inputs = Arrays.asList(LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES); PCollection<String> output = makePCollectionListOfStrings(p, inputs).apply(Flatten.pCollections()); PAssert.that(output).containsInAnyOrder(flattenLists(inputs)); p.run(); }
Example #28
Source File: CreateTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void testCreateParameterizedType() throws Exception { PCollection<TimestampedValue<String>> output = p.apply( Create.of( TimestampedValue.of("a", new Instant(0)), TimestampedValue.of("b", new Instant(0)))); PAssert.that(output) .containsInAnyOrder( TimestampedValue.of("a", new Instant(0)), TimestampedValue.of("b", new Instant(0))); p.run(); }
Example #29
Source File: RegexTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void testSplits() { PCollection<String> output = p.apply(Create.of("The quick brown fox jumps over the lazy dog")) .apply(Regex.split("\\W+")); PAssert.that(output) .containsInAnyOrder("The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"); p.run(); }
Example #30
Source File: SdkComponentsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void registerPCollectionExistingNameCollision() throws IOException { PCollection<Long> pCollection = pipeline.apply("FirstCount", GenerateSequence.from(0)).setName("foo"); String firstId = components.registerPCollection(pCollection); PCollection<Long> duplicate = pipeline.apply("SecondCount", GenerateSequence.from(0)).setName("foo"); String secondId = components.registerPCollection(duplicate); assertThat(firstId, equalTo("foo")); assertThat(secondId, containsString("foo")); assertThat(secondId, not(equalTo("foo"))); components.toComponents().getPcollectionsOrThrow(firstId); components.toComponents().getPcollectionsOrThrow(secondId); }