org.apache.beam.sdk.transforms.Combine Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.Combine.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AggregatorCombiner.java From beam with Apache License 2.0 | 7 votes |
public AggregatorCombiner( Combine.CombineFn<InputT, AccumT, OutputT> combineFn, WindowingStrategy<?, ?> windowingStrategy, Coder<AccumT> accumulatorCoder, Coder<OutputT> outputCoder) { this.combineFn = combineFn; this.windowingStrategy = (WindowingStrategy<InputT, W>) windowingStrategy; this.timestampCombiner = windowingStrategy.getTimestampCombiner(); this.accumulatorCoder = IterableCoder.of( WindowedValue.FullWindowedValueCoder.of( accumulatorCoder, windowingStrategy.getWindowFn().windowCoder())); this.outputCoder = IterableCoder.of( WindowedValue.FullWindowedValueCoder.of( outputCoder, windowingStrategy.getWindowFn().windowCoder())); }
Example #2
Source File: HBaseIOIT.java From beam with Apache License 2.0 | 6 votes |
/** Read the test dataset from hbase and validate its contents. */ private void runRead() { PCollection<Result> tableRows = pipelineRead.apply(HBaseIO.read().withConfiguration(conf).withTableId(TABLE_NAME)); PAssert.thatSingleton(tableRows.apply("Count All", Count.<Result>globally())) .isEqualTo((long) numberOfRows); PCollection<String> consolidatedHashcode = tableRows .apply(ParDo.of(new SelectNameFn())) .apply("Hash row contents", Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode) .containsInAnyOrder(TestRow.getExpectedHashForRowCount(numberOfRows)); pipelineRead.run().waitUntilFinish(); }
Example #3
Source File: CassandraIOIT.java From beam with Apache License 2.0 | 6 votes |
private void runRead() { PCollection<Scientist> output = pipelineRead.apply( CassandraIO.<Scientist>read() .withHosts(options.getCassandraHost()) .withPort(options.getCassandraPort()) .withMinNumberOfSplits(20) .withKeyspace(KEYSPACE) .withTable(TABLE) .withEntity(Scientist.class) .withCoder(SerializableCoder.of(Scientist.class))); PCollection<String> consolidatedHashcode = output .apply(ParDo.of(new SelectNameFn())) .apply("Hash row contents", Combine.globally(new HashingFn()).withoutDefaults()); PAssert.thatSingleton(consolidatedHashcode) .isEqualTo(TestRow.getExpectedHashForRowCount(options.getNumberOfRecords())); pipelineRead.run().waitUntilFinish(); }
Example #4
Source File: HadoopFormatIOElasticTest.java From beam with Apache License 2.0 | 6 votes |
/** * Test to read data from embedded Elasticsearch instance and verify whether data is read * successfully. */ @Test public void testHifIOWithElastic() { // Expected hashcode is evaluated during insertion time one time and hardcoded here. String expectedHashCode = "a62a85f5f081e3840baf1028d4d6c6bc"; Configuration conf = getConfiguration(); PCollection<KV<Text, LinkedMapWritable>> esData = pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf)); PCollection<Long> count = esData.apply(Count.globally()); // Verify that the count of objects fetched using HIFInputFormat IO is correct. PAssert.thatSingleton(count).isEqualTo((long) TEST_DATA_ROW_COUNT); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }
Example #5
Source File: HadoopFormatIOCassandraIT.java From beam with Apache License 2.0 | 6 votes |
/** This test reads data from the Cassandra instance and verifies if data is read successfully. */ @Test public void testHIFReadForCassandra() { // Expected hashcode is evaluated during insertion time one time and hardcoded here. String expectedHashCode = "1a30ad400afe4ebf5fde75f5d2d95408"; Long expectedRecordsCount = 1000L; Configuration conf = getConfiguration(options); PCollection<KV<Long, String>> cassandraData = pipeline.apply( HadoopFormatIO.<Long, String>read() .withConfiguration(conf) .withValueTranslation(myValueTranslate)); PAssert.thatSingleton(cassandraData.apply("Count", Count.globally())) .isEqualTo(expectedRecordsCount); PCollection<String> textValues = cassandraData.apply(Values.create()); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }
Example #6
Source File: FlinkStreamingTransformTranslators.java From beam with Apache License 2.0 | 6 votes |
@Override boolean canTranslate( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform, FlinkStreamingTranslationContext context) { // if we have a merging window strategy and side inputs we cannot // translate as a proper combine. We have to group and then run the combine // over the final grouped values. PCollection<KV<K, InputT>> input = context.getInput(transform); @SuppressWarnings("unchecked") WindowingStrategy<?, BoundedWindow> windowingStrategy = (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy(); return windowingStrategy.getWindowFn().isNonMerging() || ((Combine.PerKey) transform).getSideInputs().isEmpty(); }
Example #7
Source File: HadoopFormatIOCassandraTest.java From beam with Apache License 2.0 | 6 votes |
/** * Test to read data from embedded Cassandra instance and verify whether data is read * successfully. */ @Test public void testHIFReadForCassandra() { // Expected hashcode is evaluated during insertion time one time and hardcoded here. String expectedHashCode = "1b9780833cce000138b9afa25ba63486"; Configuration conf = getConfiguration(); PCollection<KV<Long, String>> cassandraData = p.apply( HadoopFormatIO.<Long, String>read() .withConfiguration(conf) .withValueTranslation(myValueTranslate)); // Verify the count of data retrieved from Cassandra matches expected count. PAssert.thatSingleton(cassandraData.apply("Count", Count.globally())) .isEqualTo(TEST_DATA_ROW_COUNT); PCollection<String> textValues = cassandraData.apply(Values.create()); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); p.run().waitUntilFinish(); }
Example #8
Source File: SparkCombineFnTest.java From beam with Apache License 2.0 | 6 votes |
private static Combine.CombineFn<Integer, Long, Long> getSumFn() { return new Combine.CombineFn<Integer, Long, Long>() { @Override public Long createAccumulator() { return 0L; } @Override public Long addInput(Long mutableAccumulator, Integer input) { return mutableAccumulator + input; } @Override public Long mergeAccumulators(Iterable<Long> accumulators) { return StreamSupport.stream(accumulators.spliterator(), false).mapToLong(e -> e).sum(); } @Override public Long extractOutput(Long accumulator) { return accumulator; } }; }
Example #9
Source File: CombineTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testBinaryCombineWithSlidingWindows() { PCollection<Integer> input = pipeline .apply( Create.timestamped( TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(3, new Instant(2)), TimestampedValue.of(5, new Instant(3)))) .apply(Window.into(SlidingWindows.of(Duration.millis(3)).every(Duration.millis(1)))) .apply( Combine.globally( Combine.BinaryCombineFn.of( (SerializableBiFunction<Integer, Integer, Integer>) (integer1, integer2) -> integer1 > integer2 ? integer1 : integer2)) .withoutDefaults()); PAssert.that(input).containsInAnyOrder(1, 3, 5, 5, 5); pipeline.run(); }
Example #10
Source File: GroupByKeyTranslator.java From beam with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private static <K, InputT, OutputT> SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> getSystemReduceFn( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform, Pipeline pipeline, KvCoder<K, InputT> kvInputCoder) { if (transform instanceof GroupByKey) { return (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>) SystemReduceFn.buffering(kvInputCoder.getValueCoder()); } else if (transform instanceof Combine.PerKey) { final CombineFnBase.GlobalCombineFn<? super InputT, ?, OutputT> combineFn = ((Combine.PerKey) transform).getFn(); return SystemReduceFn.combining( kvInputCoder.getKeyCoder(), AppliedCombineFn.withInputCoder(combineFn, pipeline.getCoderRegistry(), kvInputCoder)); } else { throw new RuntimeException("Transform " + transform + " cannot be translated as GroupByKey."); } }
Example #11
Source File: CombineTranslation.java From beam with Apache License 2.0 | 6 votes |
@Override public FunctionSpec translate( AppliedPTransform<?, ?, Combine.PerKey<?, ?, ?>> transform, SdkComponents components) throws IOException { if (transform.getTransform().getSideInputs().isEmpty()) { GlobalCombineFn<?, ?, ?> combineFn = transform.getTransform().getFn(); Coder<?> accumulatorCoder = extractAccumulatorCoder(combineFn, (AppliedPTransform) transform); return FunctionSpec.newBuilder() .setUrn(getUrn(transform.getTransform())) .setPayload(combinePayload(combineFn, accumulatorCoder, components).toByteString()) .build(); } else { // Combines with side inputs are translated as generic composites, which have a blank // FunctionSpec. return null; } }
Example #12
Source File: CombineTranslation.java From beam with Apache License 2.0 | 6 votes |
private static <K, InputT, AccumT> Coder<AccumT> extractAccumulatorCoder( GlobalCombineFn<InputT, AccumT, ?> combineFn, AppliedPTransform< PCollection<KV<K, Iterable<InputT>>>, ?, Combine.GroupedValues<K, InputT, ?>> transform) throws IOException { try { @SuppressWarnings("unchecked") PCollection<KV<K, Iterable<InputT>>> mainInput = (PCollection<KV<K, Iterable<InputT>>>) Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(transform)); KvCoder<K, Iterable<InputT>> kvCoder = (KvCoder<K, Iterable<InputT>>) mainInput.getCoder(); IterableCoder<InputT> iterCoder = (IterableCoder<InputT>) kvCoder.getValueCoder(); return combineFn.getAccumulatorCoder( transform.getPipeline().getCoderRegistry(), iterCoder.getElemCoder()); } catch (CannotProvideCoderException e) { throw new IOException("Could not obtain a Coder for the accumulator", e); } }
Example #13
Source File: CombineTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testCombineGloballyPreservesWindowing() { PCollection<Integer> input = pipeline .apply( Create.timestamped( TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(2)), TimestampedValue.of(3, new Instant(11)), TimestampedValue.of(4, new Instant(3)), TimestampedValue.of(5, new Instant(11)), TimestampedValue.of(6, new Instant(12)))) .apply(Window.into(FixedWindows.of(Duration.millis(10)))) .apply(Combine.globally(Sum.ofIntegers()).withoutDefaults()); PAssert.that(input).containsInAnyOrder(7, 14); }
Example #14
Source File: WindowTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category({ValidatesRunner.class, UsesCustomWindowMerging.class}) public void testMergingCustomWindows() { Instant startInstant = new Instant(0L); PCollection<String> inputCollection = pipeline.apply( Create.timestamped( TimestampedValue.of("big", startInstant.plus(Duration.standardSeconds(10))), TimestampedValue.of("small1", startInstant.plus(Duration.standardSeconds(20))), // This one will be outside of bigWindow thus not merged TimestampedValue.of("small2", startInstant.plus(Duration.standardSeconds(39))))); PCollection<String> windowedCollection = inputCollection.apply(Window.into(new CustomWindowFn<>())); PCollection<Long> count = windowedCollection.apply(Combine.globally(Count.<String>combineFn()).withoutDefaults()); // "small1" and "big" elements merged into bigWindow "small2" not merged // because timestamp is not in bigWindow PAssert.that("Wrong number of elements in output collection", count).containsInAnyOrder(2L, 1L); pipeline.run(); }
Example #15
Source File: WindowTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category({ValidatesRunner.class, UsesCustomWindowMerging.class}) public void testMergingCustomWindowsKeyedCollection() { Instant startInstant = new Instant(0L); PCollection<KV<Integer, String>> inputCollection = pipeline.apply( Create.timestamped( TimestampedValue.of( KV.of(0, "big"), startInstant.plus(Duration.standardSeconds(10))), TimestampedValue.of( KV.of(1, "small1"), startInstant.plus(Duration.standardSeconds(20))), // This element is not contained within the bigWindow and not merged TimestampedValue.of( KV.of(2, "small2"), startInstant.plus(Duration.standardSeconds(39))))); PCollection<KV<Integer, String>> windowedCollection = inputCollection.apply(Window.into(new CustomWindowFn<>())); PCollection<Long> count = windowedCollection.apply( Combine.globally(Count.<KV<Integer, String>>combineFn()).withoutDefaults()); // "small1" and "big" elements merged into bigWindow "small2" not merged // because it is not contained in bigWindow PAssert.that("Wrong number of elements in output collection", count).containsInAnyOrder(2L, 1L); pipeline.run(); }
Example #16
Source File: CombineTranslation.java From beam with Apache License 2.0 | 6 votes |
@Override public FunctionSpec translate( AppliedPTransform<?, ?, Combine.Globally<?, ?>> transform, SdkComponents components) throws IOException { if (transform.getTransform().getSideInputs().isEmpty()) { return FunctionSpec.newBuilder() .setUrn(getUrn(transform.getTransform())) .setPayload( payloadForCombineGlobally((AppliedPTransform) transform, components).toByteString()) .build(); } else { // Combines with side inputs are translated as generic composites, which have a blank // FunctionSpec. return null; } }
Example #17
Source File: CombineRunnersTest.java From beam with Apache License 2.0 | 6 votes |
@Before public void createPipeline() throws Exception { // Create pipeline with an input pCollection, combine, and output pCollection. TestCombineFn combineFn = new TestCombineFn(); Combine.PerKey<String, String, Integer> combine = Combine.perKey(combineFn); Pipeline p = Pipeline.create(); PCollection<KV<String, String>> inputPCollection = p.apply(Create.of(KV.of("unused", "0"))); inputPCollection.setCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())); PCollection<KV<String, Integer>> outputPCollection = inputPCollection.apply(TEST_COMBINE_ID, combine); outputPCollection.setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())); // Create FnApi protos needed for the runner. SdkComponents sdkComponents = SdkComponents.create(p.getOptions()); pProto = PipelineTranslation.toProto(p, sdkComponents); inputPCollectionId = sdkComponents.registerPCollection(inputPCollection); outputPCollectionId = sdkComponents.registerPCollection(outputPCollection); pTransform = pProto.getComponents().getTransformsOrThrow(TEST_COMBINE_ID); }
Example #18
Source File: HCatalogIOIT.java From beam with Apache License 2.0 | 5 votes |
@Test public void writeAndReadAll() { pipelineWrite .apply("Generate sequence", Create.of(buildHCatRecords(options.getNumberOfRecords()))) .apply( HCatalogIO.write() .withConfigProperties(configProperties) .withDatabase(options.getHCatalogHiveDatabaseName()) .withTable(tableName)); pipelineWrite.run().waitUntilFinish(); PCollection<String> testRecords = pipelineRead .apply( HCatalogIO.read() .withConfigProperties(configProperties) .withDatabase(options.getHCatalogHiveDatabaseName()) .withTable(tableName)) .apply(ParDo.of(new CreateHCatFn())); PCollection<String> consolidatedHashcode = testRecords.apply("Calculate hashcode", Combine.globally(new HashingFn())); String expectedHash = getHashForRecordCount(options.getNumberOfRecords(), EXPECTED_HASHES); PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash); pipelineRead.run().waitUntilFinish(); }
Example #19
Source File: BucketingFunction.java From beam with Apache License 2.0 | 5 votes |
public BucketingFunction( long bucketWidthMs, int numSignificantBuckets, int numSignificantSamples, Combine.BinaryCombineLongFn function) { this.bucketWidthMs = bucketWidthMs; this.numSignificantBuckets = numSignificantBuckets; this.numSignificantSamples = numSignificantSamples; this.function = function; this.buckets = new HashMap<>(); }
Example #20
Source File: AggregateRuntime.java From components with Apache License 2.0 | 5 votes |
@Override public PCollection<IndexedRecord> expand(PCollection<IndexedRecord> indexedRecordPCollection) { // Return an empty result if there are no operations in the list. This is normally not a permitted operation. if (operationFieldPathList.size() == 0) return (PCollection<IndexedRecord>) (PCollection) indexedRecordPCollection.getPipeline().apply( Create.empty(AvroCoder.of(AvroUtils.createEmptySchema()))); PCollection<KV<IndexedRecord, IndexedRecord>> kv = indexedRecordPCollection .apply(ParDo.of(new ExtractKVFn(new ArrayList<>(groupByFieldPathList), new ArrayList<>(operationFieldPathList)))) .setCoder(KvCoder.of(LazyAvroCoder.of(), LazyAvroCoder.of())); PCollection<KV<IndexedRecord, IndexedRecord>> aggregateResult = kv .apply(Combine.<IndexedRecord, IndexedRecord, IndexedRecord> perKey(new AggregateCombineFn(properties))) .setCoder(KvCoder.of(LazyAvroCoder.of(), NullableCoder.of(LazyAvroCoder.of()))); PCollection<IndexedRecord> result = aggregateResult .apply(ParDo.of(new DoFn<KV<IndexedRecord, IndexedRecord>, KV<IndexedRecord, IndexedRecord>>() { @ProcessElement public void processElement(ProcessContext c) { /** * Filter null value when AggregateCombineFn for nothing, see {@link * org.talend.components.processing.runtime.aggregate.AggregateCombineFn#extractOutput(AggregateCombineFn.AggregateAccumulator)} */ if (c.element().getValue() != null) { c.output(c.element()); } } })) .apply(ParDo.of(new MergeKVFn())) .setCoder(LazyAvroCoder.of()); return result; }
Example #21
Source File: KinesisIOIT.java From beam with Apache License 2.0 | 5 votes |
/** Read test dataset from Kinesis stream. */ private void runRead() { PCollection<KinesisRecord> output = pipelineRead.apply( KinesisIO.read() .withStreamName(options.getAwsKinesisStream()) .withAWSClientsProvider( options.getAwsAccessKey(), options.getAwsSecretKey(), Regions.fromName(options.getAwsKinesisRegion())) .withMaxNumRecords(numberOfRows) // to prevent endless running in case of error .withMaxReadTime(Duration.standardMinutes(10)) .withInitialPositionInStream(InitialPositionInStream.AT_TIMESTAMP) .withInitialTimestampInStream(now) .withRequestRecordsLimit(1000)); PAssert.thatSingleton(output.apply("Count All", Count.globally())) .isEqualTo((long) numberOfRows); PCollection<String> consolidatedHashcode = output .apply(ParDo.of(new ExtractDataValues())) .apply("Hash row contents", Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode) .containsInAnyOrder(TestRow.getExpectedHashForRowCount(numberOfRows)); pipelineRead.run().waitUntilFinish(); }
Example #22
Source File: FlinkStateInternals.java From beam with Apache License 2.0 | 5 votes |
FlinkCombiningState( KeyedStateBackend<ByteBuffer> flinkStateBackend, String stateId, Combine.CombineFn<InputT, AccumT, OutputT> combineFn, StateNamespace namespace, Coder<AccumT> accumCoder) { this.namespace = namespace; this.stateId = stateId; this.combineFn = combineFn; this.flinkStateBackend = flinkStateBackend; flinkStateDescriptor = new ValueStateDescriptor<>(stateId, new CoderTypeSerializer<>(accumCoder)); }
Example #23
Source File: SamzaPublishViewTransformOverride.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<ElemT> expand(PCollection<ElemT> input) { // This actually creates a branch in the graph that publishes the view but then returns // the original input. This is copied from the Flink runner. input .apply(Combine.globally(new Concatenate<ElemT>()).withoutDefaults()) .apply(new SamzaPublishView<>(view)); return input; }
Example #24
Source File: CombineTranslationTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testToProto() throws Exception { PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3)); input.apply(Combine.globally(combineFn)); final AtomicReference<AppliedPTransform<?, ?, Combine.Globally<?, ?>>> combine = new AtomicReference<>(); pipeline.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void leaveCompositeTransform(Node node) { if (node.getTransform() instanceof Combine.Globally) { checkState(combine.get() == null); combine.set((AppliedPTransform) node.toAppliedPTransform(getPipeline())); } } }); checkState(combine.get() != null); assertEquals(combineFn, combine.get().getTransform().getFn()); SdkComponents sdkComponents = SdkComponents.create(); sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java")); CombinePayload combineProto = CombineTranslation.CombineGloballyPayloadTranslator.payloadForCombineGlobally( (AppliedPTransform) combine.get(), sdkComponents); RunnerApi.Components componentsProto = sdkComponents.toComponents(); assertEquals( combineFn.getAccumulatorCoder(pipeline.getCoderRegistry(), input.getCoder()), getAccumulatorCoder(combineProto, RehydratedComponents.forComponents(componentsProto))); assertEquals( combineFn, SerializableUtils.deserializeFromByteArray( combineProto.getCombineFn().getPayload().toByteArray(), "CombineFn")); }
Example #25
Source File: Group.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<Row> expand(PCollection<InputT> input) { SchemaAggregateFn.Inner fn = schemaAggregateFn.withSchema(input.getSchema()); return input .apply("toRows", Convert.toRows()) .apply("Global Combine", Combine.globally(fn)) .setRowSchema(fn.getOutputSchema()); }
Example #26
Source File: CombineTranslation.java From beam with Apache License 2.0 | 5 votes |
/** Produces a {@link RunnerApi.CombinePayload} from a {@link Combine.Globally}. */ @VisibleForTesting static <InputT, OutputT> CombinePayload payloadForCombineGlobally( final AppliedPTransform< PCollection<InputT>, PCollection<OutputT>, Combine.Globally<InputT, OutputT>> transform, final SdkComponents components) throws IOException { GlobalCombineFn<?, ?, ?> combineFn = transform.getTransform().getFn(); Coder<?> accumulatorCoder = extractAccumulatorCoder(combineFn, (AppliedPTransform) transform); return combinePayload(combineFn, accumulatorCoder, components); }
Example #27
Source File: KafkaIOIT.java From beam with Apache License 2.0 | 5 votes |
@Test public void testKafkaIOReadsAndWritesCorrectly() throws IOException { writePipeline .apply("Generate records", Read.from(new SyntheticBoundedSource(sourceOptions))) .apply("Measure write time", ParDo.of(new TimeMonitor<>(NAMESPACE, WRITE_TIME_METRIC_NAME))) .apply("Write to Kafka", writeToKafka()); PCollection<String> hashcode = readPipeline .apply("Read from Kafka", readFromKafka()) .apply( "Measure read time", ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC_NAME))) .apply("Map records to strings", MapElements.via(new MapKafkaRecordsToStrings())) .apply("Calculate hashcode", Combine.globally(new HashingFn()).withoutDefaults()); PAssert.thatSingleton(hashcode).isEqualTo(expectedHashcode); PipelineResult writeResult = writePipeline.run(); writeResult.waitUntilFinish(); PipelineResult readResult = readPipeline.run(); PipelineResult.State readState = readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout())); cancelIfTimeouted(readResult, readState); Set<NamedTestResult> metrics = readMetrics(writeResult, readResult); IOITMetrics.publish(options.getBigQueryDataset(), options.getBigQueryTable(), metrics); IOITMetrics.publishToInflux(TEST_ID, TIMESTAMP, metrics, settings); }
Example #28
Source File: CombineValuesFnFactoryTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCombineValuesFnAll() throws Exception { TestReceiver receiver = new TestReceiver(); Combine.CombineFn<Integer, CountSum, String> combiner = (new MeanInts()); ParDoFn combineParDoFn = createCombineValuesFn( CombinePhase.ALL, combiner, StringUtf8Coder.of(), BigEndianIntegerCoder.of(), new CountSumCoder(), WindowingStrategy.globalDefault()); combineParDoFn.startBundle(receiver); combineParDoFn.processElement( WindowedValue.valueInGlobalWindow(KV.of("a", Arrays.asList(5, 6, 7)))); combineParDoFn.processElement( WindowedValue.valueInGlobalWindow(KV.of("b", Arrays.asList(1, 3, 7)))); combineParDoFn.processElement( WindowedValue.valueInGlobalWindow(KV.of("c", Arrays.asList(3, 6, 8, 9)))); combineParDoFn.finishBundle(); Object[] expectedReceivedElems = { WindowedValue.valueInGlobalWindow(KV.of("a", String.format("%.1f", 6.0))), WindowedValue.valueInGlobalWindow(KV.of("b", String.format("%.1f", 3.7))), WindowedValue.valueInGlobalWindow(KV.of("c", String.format("%.1f", 6.5))), }; assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray()); }
Example #29
Source File: DataflowPipelineTranslator.java From beam with Apache License 2.0 | 5 votes |
private <K, InputT, OutputT> void translateHelper( final CombineGroupedValues<K, InputT, OutputT> primitiveTransform, TranslationContext context) { Combine.GroupedValues<K, InputT, OutputT> originalTransform = primitiveTransform.getOriginalCombine(); StepTranslationContext stepContext = context.addStep(primitiveTransform, "CombineValues"); translateInputs( stepContext, context.getInput(primitiveTransform), originalTransform.getSideInputs(), context); AppliedCombineFn<? super K, ? super InputT, ?, OutputT> fn = originalTransform.getAppliedFn( context.getInput(primitiveTransform).getPipeline().getCoderRegistry(), context.getInput(primitiveTransform).getCoder(), context.getInput(primitiveTransform).getWindowingStrategy()); stepContext.addEncodingInput(fn.getAccumulatorCoder()); List<String> experiments = context.getPipelineOptions().getExperiments(); boolean isFnApi = experiments != null && experiments.contains("beam_fn_api"); if (isFnApi) { String ptransformId = context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentParent()); stepContext.addInput(PropertyNames.SERIALIZED_FN, ptransformId); } else { stepContext.addInput( PropertyNames.SERIALIZED_FN, byteArrayToJsonString(serializeToByteArray(fn))); } stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(primitiveTransform)); }
Example #30
Source File: MultiStepCombineTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testMultiStepCombineWindowed() { SlidingWindows windowFn = SlidingWindows.of(Duration.millis(6L)).every(Duration.millis(3L)); PCollection<KV<String, Long>> combined = pipeline .apply( Create.timestamped( TimestampedValue.of(KV.of("foo", 1L), new Instant(1L)), TimestampedValue.of(KV.of("bar", 2L), new Instant(2L)), TimestampedValue.of(KV.of("bizzle", 3L), new Instant(3L)), TimestampedValue.of(KV.of("bar", 4L), new Instant(4L)), TimestampedValue.of(KV.of("bizzle", 11L), new Instant(11L)))) .apply(Window.into(windowFn)) .apply(Combine.perKey(new MultiStepCombineFn())); PAssert.that("Windows should combine only elements in their windows", combined) .inWindow(new IntervalWindow(new Instant(0L), Duration.millis(6L))) .containsInAnyOrder(KV.of("foo", 1L), KV.of("bar", 6L), KV.of("bizzle", 3L)); PAssert.that("Elements should appear in all the windows they are assigned to", combined) .inWindow(new IntervalWindow(new Instant(-3L), Duration.millis(6L))) .containsInAnyOrder(KV.of("foo", 1L), KV.of("bar", 2L)); PAssert.that(combined) .inWindow(new IntervalWindow(new Instant(6L), Duration.millis(6L))) .containsInAnyOrder(KV.of("bizzle", 11L)); PAssert.that(combined) .containsInAnyOrder( KV.of("foo", 1L), KV.of("foo", 1L), KV.of("bar", 6L), KV.of("bar", 2L), KV.of("bar", 4L), KV.of("bizzle", 11L), KV.of("bizzle", 11L), KV.of("bizzle", 3L), KV.of("bizzle", 3L)); pipeline.run(); }