org.apache.beam.sdk.transforms.Values Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.Values.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CreateStreamTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testElementsAtAlmostPositiveInfinity() throws IOException { Instant endOfGlobalWindow = GlobalWindow.INSTANCE.maxTimestamp(); CreateStream<String> source = CreateStream.of(StringUtf8Coder.of(), batchDuration()) .nextBatch( TimestampedValue.of("foo", endOfGlobalWindow), TimestampedValue.of("bar", endOfGlobalWindow)) .advanceNextBatchWatermarkToInfinity(); FixedWindows windows = FixedWindows.of(Duration.standardHours(6)); PCollection<String> windowedValues = p.apply(source) .apply(Window.into(windows)) .apply(WithKeys.of(1)) .apply(GroupByKey.create()) .apply(Values.create()) .apply(Flatten.iterables()); PAssert.that(windowedValues) .inWindow(windows.assignWindow(GlobalWindow.INSTANCE.maxTimestamp())) .containsInAnyOrder("foo", "bar"); p.run(); }
Example #2
Source File: BigQueryMerger.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollection<Void> expand(PCollection<MergeInfo> input) { final MergeStatementBuilder mergeBuilder = new MergeStatementBuilder(mergeConfiguration); return input .apply( MapElements.into( TypeDescriptors.kvs( TypeDescriptors.strings(), TypeDescriptor.of(MergeInfo.class))) .via(mergeInfo -> KV.of(mergeInfo.getReplicaTable(), mergeInfo))) .apply(new TriggerPerKeyOnFixedIntervals<String, MergeInfo>(windowDuration)) .apply(Values.create()) .apply(MapElements.into(TypeDescriptors.strings()).via(mergeInfo -> { return mergeBuilder.buildMergeStatement( mergeInfo.getReplicaTable(), mergeInfo.getStagingTable(), mergeInfo.getAllPkFields(), mergeInfo.getAllFields()); })) .apply(ParDo.of(new BigQueryStatementIssuingFn(this.testBigQueryClient))) .apply( MapElements.into(TypeDescriptors.voids()) .via( whatever -> (Void) null)); // TODO(pabloem) Remove this line and find a return type }
Example #3
Source File: ApproximateDistinctTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void perKey() { final int cardinality = 1000; final int p = 15; final double expectedErr = 1.04 / Math.sqrt(p); List<Integer> stream = new ArrayList<>(); for (int i = 1; i <= cardinality; i++) { stream.addAll(Collections.nCopies(2, i)); } Collections.shuffle(stream); PCollection<Long> results = tp.apply("per key stream", Create.of(stream)) .apply("create keys", WithKeys.of(1)) .apply( "per key cardinality", ApproximateDistinct.<Integer, Integer>perKey().withPrecision(p)) .apply("extract values", Values.create()); PAssert.that("Verify Accuracy for cardinality per key", results) .satisfies(new VerifyAccuracy(cardinality, expectedErr)); tp.run(); }
Example #4
Source File: TestExpansionService.java From beam with Apache License 2.0 | 6 votes |
@Override public PTransform<PCollection<GenericRecord>, PCollection<String>> buildExternal( StringConfiguration configuration) { return new PTransform<PCollection<GenericRecord>, PCollection<String>>() { @Override public PCollection<String> expand(PCollection<GenericRecord> input) { return input .apply( FileIO.<GenericRecord>write() .via(ParquetIO.sink(schema)) .to(configuration.data)) .getPerDestinationOutputFilenames() .apply(Values.create()); } }; }
Example #5
Source File: FileIO.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<MatchResult.Metadata> expand(PCollection<String> input) { PCollection<MatchResult.Metadata> res; if (getConfiguration().getWatchInterval() == null) { res = input.apply( "Match filepatterns", ParDo.of(new MatchFn(getConfiguration().getEmptyMatchTreatment()))); } else { res = input .apply( "Continuously match filepatterns", Watch.growthOf( Contextful.of(new MatchPollFn(), Requirements.empty()), new ExtractFilenameFn()) .withPollInterval(getConfiguration().getWatchInterval()) .withTerminationPerInput(getConfiguration().getWatchTerminationCondition())) .apply(Values.create()); } return res.apply(Reshuffle.viaRandomKey()); }
Example #6
Source File: GatherAllPanes.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<Iterable<ValueInSingleWindow<T>>> expand(PCollection<T> input) { WindowFn<?, ?> originalWindowFn = input.getWindowingStrategy().getWindowFn(); return input .apply(Reify.windows()) .apply( WithKeys.<Integer, ValueInSingleWindow<T>>of(0) .withKeyType(new TypeDescriptor<Integer>() {})) .apply( Window.into( new IdentityWindowFn<KV<Integer, ValueInSingleWindow<T>>>( originalWindowFn.windowCoder())) .triggering(Never.ever()) .withAllowedLateness(input.getWindowingStrategy().getAllowedLateness()) .discardingFiredPanes()) // all values have the same key so they all appear as a single output element .apply(GroupByKey.create()) .apply(Values.create()) .setWindowingStrategyInternal(input.getWindowingStrategy()); }
Example #7
Source File: WriteFiles.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<List<ResultT>> expand(PCollection<ResultT> input) { if (getWindowedWrites()) { // Reshuffle the results to make them stable against retries. // Use a single void key to maximize size of bundles for finalization. return input .apply("Add void key", WithKeys.of((Void) null)) .apply("Reshuffle", Reshuffle.of()) .apply("Drop key", Values.create()) .apply("Gather bundles", ParDo.of(new GatherBundlesPerWindowFn<>())) .setCoder(ListCoder.of(resultCoder)) // Reshuffle one more time to stabilize the contents of the bundle lists to finalize. .apply(Reshuffle.viaRandomKey()); } else { // Pass results via a side input rather than reshuffle, because we need to get an empty // iterable to finalize if there are no results. return input .getPipeline() .apply(Reify.viewInGlobalWindow(input.apply(View.asList()), ListCoder.of(resultCoder))); } }
Example #8
Source File: TestStreamTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category({NeedsRunner.class, UsesTestStream.class}) public void testElementsAtAlmostPositiveInfinity() { Instant endOfGlobalWindow = GlobalWindow.INSTANCE.maxTimestamp(); TestStream<String> stream = TestStream.create(StringUtf8Coder.of()) .addElements( TimestampedValue.of("foo", endOfGlobalWindow), TimestampedValue.of("bar", endOfGlobalWindow)) .advanceWatermarkToInfinity(); FixedWindows windows = FixedWindows.of(Duration.standardHours(6)); PCollection<String> windowedValues = p.apply(stream) .apply(into(windows)) .apply(WithKeys.of(1)) .apply(GroupByKey.create()) .apply(Values.create()) .apply(Flatten.iterables()); PAssert.that(windowedValues) .inWindow(windows.assignWindow(endOfGlobalWindow)) .containsInAnyOrder("foo", "bar"); p.run(); }
Example #9
Source File: BigQueryToTableIT.java From beam with Apache License 2.0 | 6 votes |
private void runBigQueryToTablePipeline(BigQueryToTableOptions options) { Pipeline p = Pipeline.create(options); BigQueryIO.Read bigQueryRead = BigQueryIO.read().fromQuery(options.getQuery()); if (options.getUsingStandardSql()) { bigQueryRead = bigQueryRead.usingStandardSql(); } PCollection<TableRow> input = p.apply(bigQueryRead); if (options.getReshuffle()) { input = input .apply(WithKeys.<Void, TableRow>of((Void) null)) .setCoder(KvCoder.of(VoidCoder.of(), TableRowJsonCoder.of())) .apply(Reshuffle.<Void, TableRow>of()) .apply(Values.<TableRow>create()); } input.apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(options.getOutputSchema()) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); p.run().waitUntilFinish(); }
Example #10
Source File: HadoopFormatIOElasticTest.java From beam with Apache License 2.0 | 6 votes |
/** * Test to read data from embedded Elasticsearch instance and verify whether data is read * successfully. */ @Test public void testHifIOWithElastic() { // Expected hashcode is evaluated during insertion time one time and hardcoded here. String expectedHashCode = "a62a85f5f081e3840baf1028d4d6c6bc"; Configuration conf = getConfiguration(); PCollection<KV<Text, LinkedMapWritable>> esData = pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf)); PCollection<Long> count = esData.apply(Count.globally()); // Verify that the count of objects fetched using HIFInputFormat IO is correct. PAssert.thatSingleton(count).isEqualTo((long) TEST_DATA_ROW_COUNT); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }
Example #11
Source File: HadoopFormatIOCassandraIT.java From beam with Apache License 2.0 | 6 votes |
/** This test reads data from the Cassandra instance and verifies if data is read successfully. */ @Test public void testHIFReadForCassandra() { // Expected hashcode is evaluated during insertion time one time and hardcoded here. String expectedHashCode = "1a30ad400afe4ebf5fde75f5d2d95408"; Long expectedRecordsCount = 1000L; Configuration conf = getConfiguration(options); PCollection<KV<Long, String>> cassandraData = pipeline.apply( HadoopFormatIO.<Long, String>read() .withConfiguration(conf) .withValueTranslation(myValueTranslate)); PAssert.thatSingleton(cassandraData.apply("Count", Count.globally())) .isEqualTo(expectedRecordsCount); PCollection<String> textValues = cassandraData.apply(Values.create()); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }
Example #12
Source File: HadoopFormatIOElasticIT.java From beam with Apache License 2.0 | 6 votes |
/** * This test reads data from the Elasticsearch instance and verifies whether data is read * successfully. */ @Test public void testHifIOWithElastic() throws SecurityException { // Expected hashcode is evaluated during insertion time one time and hardcoded here. final long expectedRowCount = 1000L; String expectedHashCode = "42e254c8689050ed0a617ff5e80ea392"; Configuration conf = getConfiguration(options); PCollection<KV<Text, LinkedMapWritable>> esData = pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf)); // Verify that the count of objects fetched using HIFInputFormat IO is correct. PCollection<Long> count = esData.apply(Count.globally()); PAssert.thatSingleton(count).isEqualTo(expectedRowCount); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }
Example #13
Source File: ParquetIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWriteAndReadFiles() { List<GenericRecord> records = generateGenericRecords(1000); PCollection<GenericRecord> writeThenRead = mainPipeline .apply(Create.of(records).withCoder(AvroCoder.of(SCHEMA))) .apply( FileIO.<GenericRecord>write() .via(ParquetIO.sink(SCHEMA)) .to(temporaryFolder.getRoot().getAbsolutePath())) .getPerDestinationOutputFilenames() .apply(Values.create()) .apply(FileIO.matchAll()) .apply(FileIO.readMatches()) .apply(ParquetIO.readFiles(SCHEMA)); PAssert.that(writeThenRead).containsInAnyOrder(records); mainPipeline.run().waitUntilFinish(); }
Example #14
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testUnboundedSourceWithSingleTopic() { // same as testUnboundedSource, but with single topic int numElements = 1000; String topic = "my_topic"; KafkaIO.Read<Integer, Long> reader = KafkaIO.<Integer, Long>read() .withBootstrapServers("none") .withTopic("my_topic") .withConsumerFactoryFn( new ConsumerFactoryFn( ImmutableList.of(topic), 10, numElements, OffsetResetStrategy.EARLIEST)) .withMaxNumRecords(numElements) .withKeyDeserializer(IntegerDeserializer.class) .withValueDeserializer(LongDeserializer.class); PCollection<Long> input = p.apply(reader.withoutMetadata()).apply(Values.create()); addCountingAsserts(input, numElements); p.run(); }
Example #15
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testUnboundedSourceWithExplicitPartitions() { int numElements = 1000; List<String> topics = ImmutableList.of("test"); KafkaIO.Read<byte[], Long> reader = KafkaIO.<byte[], Long>read() .withBootstrapServers("none") .withTopicPartitions(ImmutableList.of(new TopicPartition("test", 5))) .withConsumerFactoryFn( new ConsumerFactoryFn( topics, 10, numElements, OffsetResetStrategy.EARLIEST)) // 10 partitions .withKeyDeserializer(ByteArrayDeserializer.class) .withValueDeserializer(LongDeserializer.class) .withMaxNumRecords(numElements / 10); PCollection<Long> input = p.apply(reader.withoutMetadata()).apply(Values.create()); // assert that every element is a multiple of 5. PAssert.that(input).satisfies(new AssertMultipleOf(5)); PAssert.thatSingleton(input.apply(Count.globally())).isEqualTo(numElements / 10L); p.run(); }
Example #16
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testUnboundedSourceTimestamps() { int numElements = 1000; PCollection<Long> input = p.apply(mkKafkaReadTransform(numElements, new ValueAsTimestampFn()).withoutMetadata()) .apply(Values.create()); addCountingAsserts(input, numElements); PCollection<Long> diffs = input .apply("TimestampDiff", ParDo.of(new ElementValueDiff())) .apply("DistinctTimestamps", Distinct.create()); // This assert also confirms that diffs only has one unique value. PAssert.thatSingleton(diffs).isEqualTo(0L); p.run(); }
Example #17
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testUnboundedSourceLogAppendTimestamps() { // LogAppendTime (server side timestamp) for records is set based on record index // in MockConsumer above. Ensure that those exact timestamps are set by the source. int numElements = 1000; PCollection<Long> input = p.apply(mkKafkaReadTransform(numElements, null).withLogAppendTime().withoutMetadata()) .apply(Values.create()); addCountingAsserts(input, numElements); PCollection<Long> diffs = input .apply( MapElements.into(TypeDescriptors.longs()) .via(t -> LOG_APPEND_START_TIME.plus(Duration.standardSeconds(t)).getMillis())) .apply("TimestampDiff", ParDo.of(new ElementValueDiff())) .apply("DistinctTimestamps", Distinct.create()); // This assert also confirms that diff only has one unique value. PAssert.thatSingleton(diffs).isEqualTo(0L); p.run(); }
Example #18
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testUnboundedSourceStartReadTime() { assumeTrue(new ConsumerSpEL().hasOffsetsForTimes()); int numElements = 1000; // In this MockConsumer, we let the elements of the time and offset equal and there are 20 // partitions. So set this startTime can read half elements. int startTime = numElements / 20 / 2; int maxNumRecords = numElements / 2; PCollection<Long> input = p.apply( mkKafkaReadTransform(numElements, maxNumRecords, new ValueAsTimestampFn()) .withStartReadTime(new Instant(startTime)) .withoutMetadata()) .apply(Values.create()); addCountingAsserts(input, maxNumRecords, maxNumRecords, maxNumRecords, numElements - 1); p.run(); }
Example #19
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testUnboundedSourceStartReadTimeException() { assumeTrue(new ConsumerSpEL().hasOffsetsForTimes()); noMessagesException.expect(RuntimeException.class); int numElements = 1000; // In this MockConsumer, we let the elements of the time and offset equal and there are 20 // partitions. So set this startTime can not read any element. int startTime = numElements / 20; p.apply( mkKafkaReadTransform(numElements, numElements, new ValueAsTimestampFn()) .withStartReadTime(new Instant(startTime)) .withoutMetadata()) .apply(Values.create()); p.run(); }
Example #20
Source File: HadoopFormatIOCassandraTest.java From beam with Apache License 2.0 | 6 votes |
/** * Test to read data from embedded Cassandra instance and verify whether data is read * successfully. */ @Test public void testHIFReadForCassandra() { // Expected hashcode is evaluated during insertion time one time and hardcoded here. String expectedHashCode = "1b9780833cce000138b9afa25ba63486"; Configuration conf = getConfiguration(); PCollection<KV<Long, String>> cassandraData = p.apply( HadoopFormatIO.<Long, String>read() .withConfiguration(conf) .withValueTranslation(myValueTranslate)); // Verify the count of data retrieved from Cassandra matches expected count. PAssert.thatSingleton(cassandraData.apply("Count", Count.globally())) .isEqualTo(TEST_DATA_ROW_COUNT); PCollection<String> textValues = cassandraData.apply(Values.create()); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); p.run().waitUntilFinish(); }
Example #21
Source File: BigQueryMerger.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollection<Void> expand(PCollection<MergeInfo> input) { final MergeStatementBuilder mergeBuilder = new MergeStatementBuilder(mergeConfiguration); return input .apply( MapElements.into( TypeDescriptors.kvs( TypeDescriptors.strings(), TypeDescriptor.of(MergeInfo.class))) .via(mergeInfo -> KV.of(mergeInfo.getReplicaTable(), mergeInfo))) .apply(new TriggerPerKeyOnFixedIntervals<String, MergeInfo>(windowDuration)) .apply(Values.create()) .apply(MapElements.into(TypeDescriptors.strings()).via(mergeInfo -> { return mergeBuilder.buildMergeStatement( mergeInfo.getReplicaTable(), mergeInfo.getStagingTable(), mergeInfo.getAllPkFields(), mergeInfo.getAllFields()); })) .apply(ParDo.of(new BigQueryStatementIssuingFn(this.testBigQueryClient))) .apply( MapElements.into(TypeDescriptors.voids()) .via( whatever -> (Void) null)); // TODO(pabloem) Remove this line and find a return type }
Example #22
Source File: HadoopFormatIOCassandraIT.java From beam with Apache License 2.0 | 5 votes |
/** * This test reads data from the Cassandra instance based on query and verifies if data is read * successfully. */ @Test public void testHIFReadForCassandraQuery() { String expectedHashCode = "7bead6d6385c5f4dd0524720cd320b49"; Long expectedNumRows = 1L; Configuration conf = getConfiguration(options); conf.set( "cassandra.input.cql", "select * from " + CASSANDRA_KEYSPACE + "." + CASSANDRA_TABLE + " where token(y_id) > ? and token(y_id) <= ? " + "and field0 = 'user48:field0:431531'"); PCollection<KV<Long, String>> cassandraData = pipeline.apply( HadoopFormatIO.<Long, String>read() .withConfiguration(conf) .withValueTranslation(myValueTranslate)); PAssert.thatSingleton(cassandraData.apply("Count", Count.globally())) .isEqualTo(expectedNumRows); PCollection<String> textValues = cassandraData.apply(Values.create()); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }
Example #23
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testUnboundedSourceSplits() throws Exception { int numElements = 1000; int numSplits = 10; // Coders must be specified explicitly here due to the way the transform // is used in the test. UnboundedSource<KafkaRecord<Integer, Long>, ?> initial = mkKafkaReadTransform(numElements, null) .withKeyDeserializerAndCoder(IntegerDeserializer.class, BigEndianIntegerCoder.of()) .withValueDeserializerAndCoder(LongDeserializer.class, BigEndianLongCoder.of()) .makeSource(); List<? extends UnboundedSource<KafkaRecord<Integer, Long>, ?>> splits = initial.split(numSplits, p.getOptions()); assertEquals("Expected exact splitting", numSplits, splits.size()); long elementsPerSplit = numElements / numSplits; assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits); PCollectionList<Long> pcollections = PCollectionList.empty(p); for (int i = 0; i < splits.size(); ++i) { pcollections = pcollections.and( p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit)) .apply("Remove Metadata " + i, ParDo.of(new RemoveKafkaMetadata<>())) .apply("collection " + i, Values.create())); } PCollection<Long> input = pcollections.apply(Flatten.pCollections()); addCountingAsserts(input, numElements); p.run(); }
Example #24
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testUnboundedSourceCustomTimestamps() { // The custom timestamps is set to customTimestampStartMillis + value. // Tests basic functionality of custom timestamps. final int numElements = 1000; final long customTimestampStartMillis = 80000L; PCollection<Long> input = p.apply( mkKafkaReadTransform(numElements, null) .withTimestampPolicyFactory( (tp, prevWatermark) -> new CustomTimestampPolicyWithLimitedDelay<Integer, Long>( (record -> new Instant( TimeUnit.SECONDS.toMillis(record.getKV().getValue()) + customTimestampStartMillis)), Duration.ZERO, prevWatermark)) .withoutMetadata()) .apply(Values.create()); addCountingAsserts(input, numElements); PCollection<Long> diffs = input .apply( MapElements.into(TypeDescriptors.longs()) .via(t -> TimeUnit.SECONDS.toMillis(t) + customTimestampStartMillis)) .apply("TimestampDiff", ParDo.of(new ElementValueDiff())) .apply("DistinctTimestamps", Distinct.create()); // This assert also confirms that diff only has one unique value. PAssert.thatSingleton(diffs).isEqualTo(0L); p.run(); }
Example #25
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testUnboundedSource() { int numElements = 1000; PCollection<Long> input = p.apply(mkKafkaReadTransform(numElements, new ValueAsTimestampFn()).withoutMetadata()) .apply(Values.create()); addCountingAsserts(input, numElements); p.run(); }
Example #26
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testValuesSink() throws Exception { // similar to testSink(), but use values()' interface. int numElements = 1000; try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) { ProducerSendCompletionThread completionThread = new ProducerSendCompletionThread(producerWrapper.mockProducer).start(); String topic = "test"; p.apply(mkKafkaReadTransform(numElements, new ValueAsTimestampFn()).withoutMetadata()) .apply(Values.create()) // there are no keys .apply( KafkaIO.<Integer, Long>write() .withBootstrapServers("none") .withTopic(topic) .withValueSerializer(LongSerializer.class) .withProducerFactoryFn(new ProducerFactoryFn(producerWrapper.producerKey)) .values()); p.run(); completionThread.shutdown(); verifyProducerRecords(producerWrapper.mockProducer, topic, numElements, true, false); } }
Example #27
Source File: HadoopFormatIOElasticIT.java From beam with Apache License 2.0 | 5 votes |
/** * This test reads data from the Elasticsearch instance based on a query and verifies if data is * read successfully. */ @Test public void testHifIOWithElasticQuery() { String expectedHashCode = "d7a7e4e42c2ca7b83ef7c1ad1ebce000"; Long expectedRecordsCount = 1L; Configuration conf = getConfiguration(options); String query = "{" + " \"query\": {" + " \"match\" : {" + " \"Title\" : {" + " \"query\" : \"Title9\"," + " \"type\" : \"boolean\"" + " }" + " }" + " }" + "}"; conf.set(ConfigurationOptions.ES_QUERY, query); PCollection<KV<Text, LinkedMapWritable>> esData = pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf)); PCollection<Long> count = esData.apply(Count.globally()); // Verify that the count of objects fetched using HIFInputFormat IO is correct. PAssert.thatSingleton(count).isEqualTo(expectedRecordsCount); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }
Example #28
Source File: HadoopFormatIOCassandraTest.java From beam with Apache License 2.0 | 5 votes |
/** * Test to read data from embedded Cassandra instance based on query and verify whether data is * read successfully. */ @Test public void testHIFReadForCassandraQuery() { Long expectedCount = 1L; String expectedChecksum = "f11caabc7a9fc170e22b41218749166c"; Configuration conf = getConfiguration(); conf.set( "cassandra.input.cql", "select * from " + CASSANDRA_KEYSPACE + "." + CASSANDRA_TABLE + " where token(id) > ? and token(id) <= ? and scientist='Faraday1' allow filtering"); PCollection<KV<Long, String>> cassandraData = p.apply( HadoopFormatIO.<Long, String>read() .withConfiguration(conf) .withValueTranslation(myValueTranslate)); // Verify the count of data retrieved from Cassandra matches expected count. PAssert.thatSingleton(cassandraData.apply("Count", Count.globally())).isEqualTo(expectedCount); PCollection<String> textValues = cassandraData.apply(Values.create()); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedChecksum); p.run().waitUntilFinish(); }
Example #29
Source File: SplittableParDoNaiveBounded.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollectionTuple expand(PCollection<KV<byte[], KV<InputT, RestrictionT>>> input) { return input .apply("Drop key", Values.create()) .apply("Reshuffle", Reshuffle.of()) .apply( "NaiveProcess", ParDo.of( new NaiveProcessFn< InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT>( original.getFn())) .withSideInputs(original.getSideInputs()) .withOutputTags(original.getMainOutputTag(), original.getAdditionalOutputTags())); }
Example #30
Source File: KafkaIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testUnreachableKafkaBrokers() { // Expect an exception when the Kafka brokers are not reachable on the workers. // We specify partitions explicitly so that splitting does not involve server interaction. // Set request timeout to 10ms so that test does not take long. thrown.expect(Exception.class); thrown.expectMessage("Reader-0: Timeout while initializing partition 'test-0'"); int numElements = 1000; PCollection<Long> input = p.apply( KafkaIO.<Integer, Long>read() .withBootstrapServers("8.8.8.8:9092") // Google public DNS ip. .withTopicPartitions(ImmutableList.of(new TopicPartition("test", 0))) .withKeyDeserializer(IntegerDeserializer.class) .withValueDeserializer(LongDeserializer.class) .withConsumerConfigUpdates( ImmutableMap.of( ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG, 5, ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, 8, ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG, 8, "default.api.timeout.ms", 10)) .withMaxNumRecords(10) .withoutMetadata()) .apply(Values.create()); addCountingAsserts(input, numElements); p.run(); }