org.apache.kafka.streams.kstream.KGroupedStream Java Examples
The following examples show how to use
org.apache.kafka.streams.kstream.KGroupedStream.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StreamingWordCount.java From Kafka-Streams-Real-time-Stream-Processing with The Unlicense | 7 votes |
public static void main(final String[] args) { final Properties props = new Properties(); props.put(StreamsConfig.APPLICATION_ID_CONFIG, "StreamingWordCount"); props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); props.put(StreamsConfig.STATE_DIR_CONFIG, "state-store"); props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); logger.info("Start Reading Messages"); StreamsBuilder streamBuilder = new StreamsBuilder(); KStream<String, String> KS0 = streamBuilder.stream("streaming-word-count"); KStream<String, String> KS1 = KS0.flatMapValues(value -> Arrays.asList(value.toLowerCase().split(" "))); KGroupedStream<String, String> KGS2 = KS1.groupBy((key, value) -> value); KTable<String, Long> KTS3 = KGS2.count(); KTS3.toStream().peek( (k, v) -> logger.info("Key = " + k + " Value = " + v.toString()) ); KafkaStreams streams = new KafkaStreams(streamBuilder.build(), props); streams.start(); Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); }
Example #2
Source File: HoppingWindowExpressionTest.java From ksql-fork-with-deep-learning-function with Apache License 2.0 | 6 votes |
@Test public void shouldCreateHoppingWindowAggregate() { final KGroupedStream stream = EasyMock.createNiceMock(KGroupedStream.class); final TimeWindowedKStream windowedKStream = EasyMock.createNiceMock(TimeWindowedKStream.class); final UdafAggregator aggregator = EasyMock.createNiceMock(UdafAggregator.class); final HoppingWindowExpression windowExpression = new HoppingWindowExpression(10, TimeUnit.SECONDS, 4, TimeUnit.MILLISECONDS); final Initializer initializer = () -> 0; final Materialized<String, GenericRow, WindowStore<Bytes, byte[]>> store = Materialized.as("store"); EasyMock.expect(stream.windowedBy(TimeWindows.of(10000L).advanceBy(4L))).andReturn(windowedKStream); EasyMock.expect(windowedKStream.aggregate(same(initializer), same(aggregator), same(store))).andReturn(null); EasyMock.replay(stream, windowedKStream); windowExpression.applyAggregate(stream, initializer, aggregator, store); EasyMock.verify(stream, windowedKStream); }
Example #3
Source File: TumblingWindowExpressionTest.java From ksql-fork-with-deep-learning-function with Apache License 2.0 | 6 votes |
@Test public void shouldCreateTumblingWindowAggregate() { final KGroupedStream stream = EasyMock.createNiceMock(KGroupedStream.class); final TimeWindowedKStream windowedKStream = EasyMock.createNiceMock(TimeWindowedKStream.class); final UdafAggregator aggregator = EasyMock.createNiceMock(UdafAggregator.class); final TumblingWindowExpression windowExpression = new TumblingWindowExpression(10, TimeUnit.SECONDS); final Initializer initializer = () -> 0; final Materialized<String, GenericRow, WindowStore<Bytes, byte[]>> store = Materialized.as("store"); EasyMock.expect(stream.windowedBy(TimeWindows.of(10000L))).andReturn(windowedKStream); EasyMock.expect(windowedKStream.aggregate(same(initializer), same(aggregator), same(store))).andReturn(null); EasyMock.replay(stream, windowedKStream); windowExpression.applyAggregate(stream, initializer, aggregator, store); EasyMock.verify(stream, windowedKStream); }
Example #4
Source File: RunningAverage.java From kafka-tutorials with Apache License 2.0 | 6 votes |
protected static KTable<Long, Double> getRatingAverageTable(KStream<Long, Rating> ratings, String avgRatingsTopicName, SpecificAvroSerde<CountAndSum> countAndSumSerde) { // Grouping Ratings KGroupedStream<Long, Double> ratingsById = ratings .map((key, rating) -> new KeyValue<>(rating.getMovieId(), rating.getRating())) .groupByKey(with(Long(), Double())); final KTable<Long, CountAndSum> ratingCountAndSum = ratingsById.aggregate(() -> new CountAndSum(0L, 0.0), (key, value, aggregate) -> { aggregate.setCount(aggregate.getCount() + 1); aggregate.setSum(aggregate.getSum() + value); return aggregate; }, Materialized.with(Long(), countAndSumSerde)); final KTable<Long, Double> ratingAverage = ratingCountAndSum.mapValues(value -> value.getSum() / value.getCount(), Materialized.as("average-ratings")); // persist the result in topic ratingAverage.toStream().to(avgRatingsTopicName); return ratingAverage; }
Example #5
Source File: GenericMetricProcessorIT.java From SkaETL with Apache License 2.0 | 5 votes |
@Test public void shouldComputeMedian() { List<JsonNode> input = Arrays.asList( toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 1}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 2}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 3}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 4}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 5}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 6}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 100}") ); String destTopic = "median-dest"; GenericMetricProcessor minDuration = new GenericMetricProcessor(buildProcessMetric("median", destTopic), "median-src", functionRegistry, udafRegistry) { @Override protected AggregateFunction aggInitializer() { return aggFunction("median"); } @Override protected KTable<Windowed<Keys>, Double> aggregate(KGroupedStream<Keys, JsonNode> kGroupedStream) { return aggregateTumblingWindow(kGroupedStream, 1, TimeUnit.SECONDS); } @Override protected JsonNode mapValues(JsonNode value) { return value.path("duration"); } }; List<KafkaUnit.Message<Keys, MetricResult>> resultInDestTopic = executeMetricStream(input, minDuration, destTopic); assertThat(resultInDestTopic).hasSize(1); KafkaUnit.Message<Keys, MetricResult> result1 = resultInDestTopic.get(0); assertThat(result1.getKey().getRuleName()).isEqualTo("median"); assertThat(result1.getKey().getRuleDSL()).isNotBlank(); assertThat(result1.getKey().getProject()).isEqualTo("myproject"); assertThat(result1.getValue().getResult()).isEqualTo(4.0029296875); }
Example #6
Source File: HoppingWindowExpression.java From ksql-fork-with-deep-learning-function with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public KTable applyAggregate( KGroupedStream groupedStream, Initializer initializer, UdafAggregator aggregator, Materialized<String, GenericRow, ?> materialized ) { return groupedStream.windowedBy( TimeWindows.of(sizeUnit.toMillis(size)) .advanceBy(advanceByUnit.toMillis(advanceBy)) ).aggregate(initializer, aggregator, materialized); }
Example #7
Source File: TumblingWindowExpression.java From ksql-fork-with-deep-learning-function with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public KTable applyAggregate(final KGroupedStream groupedStream, final Initializer initializer, final UdafAggregator aggregator, final Materialized<String, GenericRow, ?> materialized) { return groupedStream.windowedBy(TimeWindows.of(sizeUnit.toMillis(size))) .aggregate(initializer, aggregator, materialized); }
Example #8
Source File: SessionWindowExpression.java From ksql-fork-with-deep-learning-function with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public KTable applyAggregate(final KGroupedStream groupedStream, final Initializer initializer, final UdafAggregator aggregator, final Materialized<String, GenericRow, ?> materialized) { return groupedStream.windowedBy(SessionWindows.with(sizeUnit.toMillis(gap))) .aggregate(initializer, aggregator, aggregator.getMerger(), materialized); }
Example #9
Source File: SchemaKGroupedStream.java From ksql-fork-with-deep-learning-function with Apache License 2.0 | 5 votes |
SchemaKGroupedStream( final Schema schema, final KGroupedStream kgroupedStream, final Field keyField, final List<SchemaKStream> sourceSchemaKStreams, final FunctionRegistry functionRegistry, final SchemaRegistryClient schemaRegistryClient ) { this.schema = schema; this.kgroupedStream = kgroupedStream; this.keyField = keyField; this.sourceSchemaKStreams = sourceSchemaKStreams; this.functionRegistry = functionRegistry; this.schemaRegistryClient = schemaRegistryClient; }
Example #10
Source File: GenericMetricProcessorIT.java From SkaETL with Apache License 2.0 | 5 votes |
@Test public void shouldComputeSum() { List<JsonNode> input = Arrays.asList( toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 1}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"somethingelse\",\"duration\": 9}") ); String destTopic = "sum-dest"; GenericMetricProcessor minDuration = new GenericMetricProcessor(buildProcessMetric("sum", destTopic), "sum-src", functionRegistry, udafRegistry) { @Override protected AggregateFunction aggInitializer() { return aggFunction("sum"); } @Override protected KTable<Windowed<Keys>, Double> aggregate(KGroupedStream<Keys, JsonNode> kGroupedStream) { return aggregateTumblingWindow(kGroupedStream, 1, TimeUnit.SECONDS); } @Override protected JsonNode mapValues(JsonNode value) { return value.path("duration"); } }; List<KafkaUnit.Message<Keys, MetricResult>> resultInDestTopic = executeMetricStream(input, minDuration, destTopic); assertThat(resultInDestTopic).hasSize(1); KafkaUnit.Message<Keys, MetricResult> result1 = resultInDestTopic.get(0); assertThat(result1.getKey().getRuleName()).isEqualTo("sum"); assertThat(result1.getKey().getRuleDSL()).isNotBlank(); assertThat(result1.getKey().getProject()).isEqualTo("myproject"); assertThat(result1.getValue().getResult()).isEqualTo(10); }
Example #11
Source File: GenericMetricProcessorIT.java From SkaETL with Apache License 2.0 | 5 votes |
@Test public void shouldComputeAvg() { List<JsonNode> input = Arrays.asList( toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 1}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"somethingelse\",\"duration\": 9}") ); String destTopic = "avg-dest"; GenericMetricProcessor minDuration = new GenericMetricProcessor(buildProcessMetric("avg", destTopic), "avg-src", functionRegistry, udafRegistry) { @Override protected AggregateFunction aggInitializer() { return aggFunction("avg"); } @Override protected KTable<Windowed<Keys>, Double> aggregate(KGroupedStream<Keys, JsonNode> kGroupedStream) { return aggregateTumblingWindow(kGroupedStream, 1, TimeUnit.SECONDS); } @Override protected JsonNode mapValues(JsonNode value) { return value.path("duration"); } }; List<KafkaUnit.Message<Keys, MetricResult>> resultInDestTopic = executeMetricStream(input, minDuration, destTopic); assertThat(resultInDestTopic).hasSize(1); KafkaUnit.Message<Keys, MetricResult> result1 = resultInDestTopic.get(0); assertThat(result1.getKey().getRuleName()).isEqualTo("avg"); assertThat(result1.getKey().getRuleDSL()).isNotBlank(); assertThat(result1.getKey().getProject()).isEqualTo("myproject"); assertThat(result1.getValue().getResult()).isEqualTo(5); }
Example #12
Source File: GenericMetricProcessorIT.java From SkaETL with Apache License 2.0 | 5 votes |
@Test public void shouldComputeMax() { List<JsonNode> input = Arrays.asList( toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 1}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"somethingelse\",\"duration\": 10}") ); String destTopic = "max-dest"; GenericMetricProcessor minDuration = new GenericMetricProcessor(buildProcessMetric("max", destTopic), "max-src", functionRegistry, udafRegistry) { @Override protected AggregateFunction aggInitializer() { return aggFunction("max"); } @Override protected KTable<Windowed<Keys>, Double> aggregate(KGroupedStream<Keys, JsonNode> kGroupedStream) { return aggregateTumblingWindow(kGroupedStream, 1, TimeUnit.SECONDS); } @Override protected JsonNode mapValues(JsonNode value) { return value.path("duration"); } }; List<KafkaUnit.Message<Keys, MetricResult>> resultInDestTopic = executeMetricStream(input, minDuration, destTopic); assertThat(resultInDestTopic).hasSize(1); KafkaUnit.Message<Keys, MetricResult> result1 = resultInDestTopic.get(0); assertThat(result1.getKey().getRuleName()).isEqualTo("max"); assertThat(result1.getKey().getRuleDSL()).isNotBlank(); assertThat(result1.getKey().getProject()).isEqualTo("myproject"); assertThat(result1.getValue().getResult()).isEqualTo(10); }
Example #13
Source File: GenericMetricProcessorIT.java From SkaETL with Apache License 2.0 | 5 votes |
@Test public void shouldComputeMin() { List<JsonNode> input = Arrays.asList( toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 1}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"somethingelse\",\"duration\": 10}") ); String destTopic = "min-dest"; GenericMetricProcessor minDuration = new GenericMetricProcessor(buildProcessMetric("min", destTopic), "min-src", functionRegistry, udafRegistry) { @Override protected AggregateFunction aggInitializer() { return aggFunction("min"); } @Override protected KTable<Windowed<Keys>, Double> aggregate(KGroupedStream<Keys, JsonNode> kGroupedStream) { return aggregateTumblingWindow(kGroupedStream, 1, TimeUnit.SECONDS); } @Override protected JsonNode mapValues(JsonNode value) { return value.path("duration"); } }; List<KafkaUnit.Message<Keys, MetricResult>> resultInDestTopic = executeMetricStream(input, minDuration, destTopic); assertThat(resultInDestTopic).hasSize(1); assertThat(resultInDestTopic.get(0).getKey().getRuleName()).isEqualTo("min"); assertThat(resultInDestTopic.get(0).getKey().getRuleDSL()).isNotBlank(); assertThat(resultInDestTopic.get(0).getKey().getProject()).isEqualTo("myproject"); assertThat(resultInDestTopic.get(0).getValue().getResult()).isEqualTo(1); }
Example #14
Source File: GenericMetricProcessorIT.java From SkaETL with Apache License 2.0 | 5 votes |
@Test public void shouldComputeCountDistinct() { List<JsonNode> input = Arrays.asList( toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 1}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"somethingelse\",\"duration\": 10}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"somethingelse\",\"duration\": 11}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"somethingelse\",\"duration\": 12}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"somethingelse\",\"duration\": 13}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"anotherone\",\"duration\": 13}") ); String destTopic = "count-distinct-dest"; GenericMetricProcessor minDuration = new GenericMetricProcessor(buildProcessMetric("count-distinct", destTopic), "count-distinct-src", functionRegistry, udafRegistry) { @Override protected AggregateFunction aggInitializer() { return aggFunction("count-distinct"); } @Override protected KTable<Windowed<Keys>, Double> aggregate(KGroupedStream<Keys, JsonNode> kGroupedStream) { return aggregateTumblingWindow(kGroupedStream, 1, TimeUnit.SECONDS); } @Override protected JsonNode mapValues(JsonNode value) { return value.path("type"); } }; List<KafkaUnit.Message<Keys, MetricResult>> resultInDestTopic = executeMetricStream(input, minDuration, destTopic); assertThat(resultInDestTopic).hasSize(1); assertThat(resultInDestTopic.get(0).getKey().getRuleName()).isEqualTo("count-distinct"); assertThat(resultInDestTopic.get(0).getKey().getRuleDSL()).isNotBlank(); assertThat(resultInDestTopic.get(0).getKey().getProject()).isEqualTo("myproject"); assertThat(resultInDestTopic.get(0).getValue().getResult()).isEqualTo(3); }
Example #15
Source File: GenericMetricProcessorIT.java From SkaETL with Apache License 2.0 | 5 votes |
@Test public void shouldComputeCount() { List<JsonNode> input = Arrays.asList( toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 1}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"somethingelse\",\"duration\": 10}") ); String destTopic = "count-dest"; GenericMetricProcessor minDuration = new GenericMetricProcessor(buildProcessMetric("count", destTopic), "count-src", functionRegistry, udafRegistry) { @Override protected AggregateFunction aggInitializer() { return aggFunction("count"); } @Override protected KTable<Windowed<Keys>, Double> aggregate(KGroupedStream<Keys, JsonNode> kGroupedStream) { return aggregateTumblingWindow(kGroupedStream, 1, TimeUnit.SECONDS); } @Override protected JsonNode mapValues(JsonNode value) { return value.path("duration"); } }; List<KafkaUnit.Message<Keys, MetricResult>> resultInDestTopic = executeMetricStream(input, minDuration, destTopic); assertThat(resultInDestTopic).hasSize(1); assertThat(resultInDestTopic.get(0).getKey().getRuleName()).isEqualTo("count"); assertThat(resultInDestTopic.get(0).getKey().getRuleDSL()).isNotBlank(); assertThat(resultInDestTopic.get(0).getKey().getProject()).isEqualTo("myproject"); assertThat(resultInDestTopic.get(0).getValue().getResult()).isEqualTo(2); }
Example #16
Source File: ScsApplication.java From spring_io_2019 with Apache License 2.0 | 5 votes |
@StreamListener @SendTo(Bindings.AVG_RATINGS) KStream<Long, Double> averageRatingsFor(@Input(Bindings.RATINGS) KStream<Long, Rating> ratings) { KGroupedStream<Long, Double> ratingsGrouped = ratings .mapValues(Rating::getRating) .groupByKey(); KTable<Long, Long> count = ratingsGrouped.count(); KTable<Long, Double> reduce = ratingsGrouped.reduce(Double::sum, Materialized.with(Serdes.Long(), Serdes.Double())); KTable<Long, Double> join = reduce.join(count, (sum, count1) -> sum / count1, Materialized.with(Serdes.Long(), Serdes.Double())); return join.toStream(); }
Example #17
Source File: CogroupingStreams.java From kafka-tutorials with Apache License 2.0 | 5 votes |
public Topology buildTopology(Properties envProps) { final StreamsBuilder builder = new StreamsBuilder(); final String appOneInputTopic = envProps.getProperty("app-one.topic.name"); final String appTwoInputTopic = envProps.getProperty("app-two.topic.name"); final String appThreeInputTopic = envProps.getProperty("app-three.topic.name"); final String totalResultOutputTopic = envProps.getProperty("output.topic.name"); final Serde<String> stringSerde = getPrimitiveAvroSerde(envProps, true); final Serde<LoginEvent> loginEventSerde = getSpecificAvroSerde(envProps); final Serde<LoginRollup> loginRollupSerde = getSpecificAvroSerde(envProps); final KStream<String, LoginEvent> appOneStream = builder.stream(appOneInputTopic, Consumed.with(stringSerde, loginEventSerde)); final KStream<String, LoginEvent> appTwoStream = builder.stream(appTwoInputTopic, Consumed.with(stringSerde, loginEventSerde)); final KStream<String, LoginEvent> appThreeStream = builder.stream(appThreeInputTopic, Consumed.with(stringSerde, loginEventSerde)); final Aggregator<String, LoginEvent, LoginRollup> loginAggregator = new LoginAggregator(); final KGroupedStream<String, LoginEvent> appOneGrouped = appOneStream.groupByKey(); final KGroupedStream<String, LoginEvent> appTwoGrouped = appTwoStream.groupByKey(); final KGroupedStream<String, LoginEvent> appThreeGrouped = appThreeStream.groupByKey(); appOneGrouped.cogroup(loginAggregator) .cogroup(appTwoGrouped, loginAggregator) .cogroup(appThreeGrouped, loginAggregator) .aggregate(() -> new LoginRollup(new HashMap<>()), Materialized.with(Serdes.String(), loginRollupSerde)) .toStream().to(totalResultOutputTopic, Produced.with(stringSerde, loginRollupSerde)); return builder.build(); }
Example #18
Source File: GenericMetricProcessorIT.java From SkaETL with Apache License 2.0 | 4 votes |
@Test @Ignore("slow") public void shouldComputeMinWithTumblingWindow() { List<JsonNode> inputStartTime = Arrays.asList( toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 2}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"somethingelse\",\"duration\": 10}") ); try { Thread.sleep(1000); } catch (InterruptedException e) { e.printStackTrace(); } List<JsonNode> inputsDelayed = Arrays.asList( toJsonNode("{\"project\":\"myproject\",\"type\":\"something\",\"duration\": 1}"), toJsonNode("{\"project\":\"myproject\",\"type\":\"somethingelse\",\"duration\": 8}") ); List<JsonNode> input = new ArrayList<>(); input.addAll(inputStartTime); input.addAll(inputsDelayed); String destTopic = "min-dest"; GenericMetricProcessor minDuration = new GenericMetricProcessor(buildProcessMetric("min",destTopic), "dsl", "min-src", functionRegistry, udafRegistry) { @Override protected AggregateFunction aggInitializer() { return aggFunction("min"); } @Override protected KTable<Windowed<Keys>, Double> aggregate(KGroupedStream<Keys, JsonNode> kGroupedStream) { return aggregateTumblingWindow(kGroupedStream, 1, TimeUnit.SECONDS); } @Override protected JsonNode mapValues(JsonNode value) { return value.path("duration"); } }; List<KafkaUnit.Message<Keys, MetricResult>> resultInDestTopic = executeMetricStream(input, minDuration, destTopic); assertThat(resultInDestTopic).hasSize(2); assertThat(resultInDestTopic.get(0).getKey().getRuleName()).isEqualTo("min"); assertThat(resultInDestTopic.get(0).getKey().getRuleDSL()).isNotBlank(); assertThat(resultInDestTopic.get(0).getKey().getProject()).isEqualTo("myproject"); assertThat(resultInDestTopic.get(0).getValue().getResult()).isEqualTo(2); assertThat(resultInDestTopic.get(1).getKey().getRuleName()).isEqualTo("min"); assertThat(resultInDestTopic.get(1).getKey().getRuleDSL()).isNotBlank(); assertThat(resultInDestTopic.get(1).getKey().getProject()).isEqualTo("myproject"); assertThat(resultInDestTopic.get(1).getValue().getResult()).isEqualTo(1); }
Example #19
Source File: KsqlWindowExpression.java From ksql-fork-with-deep-learning-function with Apache License 2.0 | 4 votes |
public abstract KTable applyAggregate(final KGroupedStream groupedStream, final Initializer initializer, final UdafAggregator aggregator, final Materialized<String, GenericRow, ?> materialized);