org.apache.kafka.streams.kstream.KStream Java Exaples

Source File: UserClicksPerMinute.java From fluent-kafka-streams-tests with MIT License

8 votes

public Topology getTopology() {
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<Integer, ClickEvent> clickEvents = builder.stream(this.inputTopic);

    final KTable<Windowed<Integer>, Long> counts = clickEvents
            .groupByKey()
            .windowedBy(TimeWindows.of(Duration.ofMinutes(1)))
            .count();

    counts.toStream()
            .map((key, value) -> KeyValue.pair(
                    key.key(),
                    new ClickOutput(key.key(), value, key.window().start())))
            .to(this.outputTopic, Produced.with(Serdes.Integer(), new JsonSerde<>(ClickOutput.class)));

    return builder.build();
}

Source File: DeserializtionErrorHandlerByBinderTests.java From spring-cloud-stream-binder-kafka with Apache License 2.0

7 votes

@Test
@Ignore
public void test() {
	Map<String, Object> senderProps = KafkaTestUtils.producerProps(embeddedKafka);
	DefaultKafkaProducerFactory<Integer, String> pf = new DefaultKafkaProducerFactory<>(
			senderProps);
	KafkaTemplate<Integer, String> template = new KafkaTemplate<>(pf, true);
	template.setDefaultTopic("foos");
	template.sendDefault(1, 7, "hello");

	Map<String, Object> consumerProps = KafkaTestUtils.consumerProps("foobar",
			"false", embeddedKafka);
	consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
	DefaultKafkaConsumerFactory<String, String> cf = new DefaultKafkaConsumerFactory<>(
			consumerProps);
	Consumer<String, String> consumer1 = cf.createConsumer();
	embeddedKafka.consumeFromAnEmbeddedTopic(consumer1,
			"error.foos.foobar-group");

	ConsumerRecord<String, String> cr = KafkaTestUtils.getSingleRecord(consumer1,
			"error.foos.foobar-group");
	assertThat(cr.value()).isEqualTo("hello");
	assertThat(cr.partition()).isEqualTo(0);

	// Ensuring that the deserialization was indeed done by the binder
	verify(conversionDelegate).deserializeOnInbound(any(Class.class),
			any(KStream.class));
}

Source File: ScsApplication.java From spring_io_2019 with Apache License 2.0

7 votes

@StreamListener
@SendTo(Bindings.RATED_MOVIES)
KStream<Long, RatedMovie> rateMoviesFor(@Input(Bindings.AVG_TABLE) KTable<Long, Double> ratings,
                                        @Input(Bindings.MOVIES) KTable<Long, Movie> movies) {

  ValueJoiner<Movie, Double, RatedMovie> joiner = (movie, rating) ->
      new RatedMovie(movie.getMovieId(), movie.getReleaseYear(), movie.getTitle(), rating);

  movies
      .join(ratings, joiner, Materialized
          .<Long, RatedMovie, KeyValueStore<Bytes, byte[]>>as(Bindings.RATED_MOVIES_STORE)
          .withKeySerde(Serdes.Long())
          .withValueSerde(new JsonSerde<>(RatedMovie.class)));

  return movies.join(ratings, joiner).toStream();
}

Source File: StreamsStarterApp.java From kafka-streams-machine-learning-examples with Apache License 2.0

6 votes

public static void main(String[] args) {

		Properties config = new Properties();
		config.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-starter-app");
		config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
		config.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
		config.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
		config.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());

		StreamsBuilder builder = new StreamsBuilder();

		KStream<String, String> kStream = builder.stream("streams-file-input");
		// do stuff
		kStream.to("streams-wordcount-output");

		KafkaStreams streams = new KafkaStreams(builder.build(), config);
		streams.cleanUp(); // only do this in dev - not in prod
		streams.start();

		// print the topology
		System.out.println(streams.localThreadsMetadata().toString());

		// shutdown hook to correctly close the streams application
		Runtime.getRuntime().addShutdownHook(new Thread(streams::close));

	}

Source File: CountVersionApplication.java From spring-cloud-stream-samples with Apache License 2.0

6 votes

@Bean
public Function<KStream<Object, Sensor>, KStream<String, Long>> process() {

    Map<String, Object> configs = new HashMap<>();
    configs.put("valueClass", Sensor.class);
    configs.put("contentType", "application/*+avro");
    customSerde.configure(configs, false);

    return input -> input
            .map((key, value) -> {

                String newKey = "v1";
                if (value.getId().toString().endsWith("v2")) {
                    newKey = "v2";
                }
                return new KeyValue<>(newKey, value);
            })
            .groupByKey(Grouped.with(Serdes.String(), customSerde))
            .count(Materialized.as(STORE_NAME))
            .toStream();
}

Source File: RunningAverage.java From kafka-tutorials with Apache License 2.0

6 votes

protected static KTable<Long, Double> getRatingAverageTable(KStream<Long, Rating> ratings,
                                                            String avgRatingsTopicName,
                                                            SpecificAvroSerde<CountAndSum> countAndSumSerde) {

  // Grouping Ratings
  KGroupedStream<Long, Double> ratingsById = ratings
      .map((key, rating) -> new KeyValue<>(rating.getMovieId(), rating.getRating()))
      .groupByKey(with(Long(), Double()));

  final KTable<Long, CountAndSum> ratingCountAndSum =
      ratingsById.aggregate(() -> new CountAndSum(0L, 0.0),
                            (key, value, aggregate) -> {
                              aggregate.setCount(aggregate.getCount() + 1);
                              aggregate.setSum(aggregate.getSum() + value);
                              return aggregate;
                            },
                            Materialized.with(Long(), countAndSumSerde));

  final KTable<Long, Double> ratingAverage =
      ratingCountAndSum.mapValues(value -> value.getSum() / value.getCount(),
                                  Materialized.as("average-ratings"));

  // persist the result in topic
  ratingAverage.toStream().to(avgRatingsTopicName);
  return ratingAverage;
}

Source File: KStreamBoundElementFactory.java From spring-cloud-stream-binder-kafka with Apache License 2.0

6 votes

@Override
public Object invoke(MethodInvocation methodInvocation) throws Throwable {
	if (methodInvocation.getMethod().getDeclaringClass().equals(KStream.class)) {
		Assert.notNull(this.delegate,
				"Trying to prepareConsumerBinding " + methodInvocation.getMethod()
						+ "  but no delegate has been set.");
		return methodInvocation.getMethod().invoke(this.delegate,
				methodInvocation.getArguments());
	}
	else if (methodInvocation.getMethod().getDeclaringClass()
			.equals(KStreamWrapper.class)) {
		return methodInvocation.getMethod().invoke(this,
				methodInvocation.getArguments());
	}
	else {
		throw new IllegalStateException(
				"Only KStream method invocations are permitted");
	}
}

Source File: KStreamBoundElementFactory.java From spring-cloud-stream-binder-kafka with Apache License 2.0

6 votes

@Override
public KStream createInput(String name) {
	BindingProperties bindingProperties = this.bindingServiceProperties.getBindingProperties(name);
	ConsumerProperties consumerProperties = bindingProperties.getConsumer();
	if (consumerProperties == null) {
		consumerProperties = this.bindingServiceProperties.getConsumerProperties(name);
		consumerProperties.setUseNativeDecoding(true);
	}
	else {
		if (!encodingDecodingBindAdviceHandler.isDecodingSettingProvided()) {
			consumerProperties.setUseNativeDecoding(true);
		}
	}
	// Always set multiplex to true in the kafka streams binder
	consumerProperties.setMultiplex(true);
	return createProxyForKStream(name);
}

Source File: KafkaStreamsNativeEncodingDecodingTests.java From spring-cloud-stream-binder-kafka with Apache License 2.0

6 votes

@Test
public void test() {
	Map<String, Object> senderProps = KafkaTestUtils.producerProps(embeddedKafka);
	DefaultKafkaProducerFactory<Integer, String> pf = new DefaultKafkaProducerFactory<>(
			senderProps);
	KafkaTemplate<Integer, String> template = new KafkaTemplate<>(pf, true);
	template.setDefaultTopic("decode-words");
	template.sendDefault("foobar");
	StopWatch stopWatch = new StopWatch();
	stopWatch.start();
	System.out.println("Starting: ");
	ConsumerRecord<String, String> cr = KafkaTestUtils.getSingleRecord(consumer,
			"decode-counts");
	stopWatch.stop();
	System.out.println("Total time: " + stopWatch.getTotalTimeSeconds());
	assertThat(cr.value().equals("Count for foobar : 1")).isTrue();

	verify(conversionDelegate).serializeOnOutbound(any(KStream.class));
	verify(conversionDelegate).deserializeOnInbound(any(Class.class),
			any(KStream.class));
}

Source File: WordCount.java From fluent-kafka-streams-tests with MIT License

6 votes

public Topology getTopology() {
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();

    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<String, String> textLines = builder.stream(this.inputTopic);

    final Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS);
    final KTable<String, Long> wordCounts = textLines
            .flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase())))
            .groupBy((key, word) -> word)
            .count();

    wordCounts.toStream().to(this.outputTopic, Produced.with(stringSerde, longSerde));
    return builder.build();
}

Source File: KafkaDenormalizer.java From cqrs-eventsourcing-kafka with Apache License 2.0

6 votes

@Override
public void start() throws Exception {
    Predicate<String, EventEnvelope> inventoryItemCreated = (k, v) -> k.equals(InventoryItemCreated.class.getSimpleName());
    Predicate<String, EventEnvelope> inventoryItemRenamed =  (k, v) -> k.equals(InventoryItemRenamed.class.getSimpleName());
    Predicate<String, EventEnvelope> inventoryItemDeactivated = (k, v) -> k.equals(InventoryItemDeactivated.class.getSimpleName());

    StreamsBuilder builder = new StreamsBuilder();

    KStream<String, EventEnvelope>[] filteredStreams = builder
            .stream(INVENTORY_ITEM_TOPIC, Consumed.with(Serdes.String(), initializeEnvelopeSerde()))
            .selectKey((k, v) -> v.eventType)
            .branch(inventoryItemCreated, inventoryItemRenamed, inventoryItemDeactivated);

    filteredStreams[0].process(InventoryItemCreatedHandler::new);
    filteredStreams[1].process(InventoryItemRenamedHandler::new);
    filteredStreams[2].process(InventoryItemDeactivatedHandler::new);

    kafkaStreams = new KafkaStreams(builder.build(), getProperties());
    kafkaStreams.cleanUp(); // -- only because we are using in-memory
    kafkaStreams.start();
}

Source File: QueuedSchemaKStream.java From ksql-fork-with-deep-learning-function with Apache License 2.0

6 votes

private QueuedSchemaKStream(
    final Schema schema,
    final KStream kstream,
    final Field keyField,
    final List<SchemaKStream> sourceSchemaKStreams,
    final Type type,
    final FunctionRegistry functionRegistry,
    final Optional<Integer> limit,
    final OutputNode outputNode,
    final SchemaRegistryClient schemaRegistryClient
) {
  super(
      schema,
      kstream,
      keyField,
      sourceSchemaKStreams,
      type,
      functionRegistry,
      schemaRegistryClient
  );
  setOutputNode(outputNode);
  kstream.foreach(new QueuedSchemaKStream.QueuePopulator(rowQueue, limit));
}

Source File: StreamUtils.java From kafka-graphs with Apache License 2.0

6 votes

public static <K, V> KStream<K, V> streamFromCollection(
    StreamsBuilder builder,
    Properties props,
    String topic,
    int numPartitions,
    short replicationFactor,
    Serde<K> keySerde,
    Serde<V> valueSerde,
    Collection<KeyValue<K, V>> values) {

    ClientUtils.createTopic(topic, numPartitions, replicationFactor, props);
    try (Producer<K, V> producer = new KafkaProducer<>(props, keySerde.serializer(), valueSerde.serializer())) {
        for (KeyValue<K, V> value : values) {
            ProducerRecord<K, V> producerRecord = new ProducerRecord<>(topic, value.key, value.value);
            producer.send(producerRecord);
        }
        producer.flush();
    }
    return builder.stream(topic, Consumed.with(keySerde, valueSerde));
}

Source File: KafkaStreamsStreamListenerSetupMethodOrchestrator.java From spring-cloud-stream-binder-kafka with Apache License 2.0

6 votes

private KStream<?, ?> getkStream(String inboundName,
								KafkaStreamsStateStoreProperties storeSpec,
								BindingProperties bindingProperties,
								KafkaStreamsConsumerProperties kafkaStreamsConsumerProperties, StreamsBuilder streamsBuilder,
								Serde<?> keySerde, Serde<?> valueSerde,
								Topology.AutoOffsetReset autoOffsetReset, boolean firstBuild) {
	if (storeSpec != null) {
		StoreBuilder storeBuilder = buildStateStore(storeSpec);
		streamsBuilder.addStateStore(storeBuilder);
		if (LOG.isInfoEnabled()) {
			LOG.info("state store " + storeBuilder.name() + " added to topology");
		}
	}
	return getKStream(inboundName, bindingProperties, kafkaStreamsConsumerProperties, streamsBuilder,
			keySerde, valueSerde, autoOffsetReset, firstBuild);
}

Source File: KStreamBoundElementFactory.java From spring-cloud-stream-binder-kafka with Apache License 2.0

6 votes

@Override
@SuppressWarnings("unchecked")
public KStream createOutput(final String name) {

	BindingProperties bindingProperties = this.bindingServiceProperties.getBindingProperties(name);
	ProducerProperties producerProperties = bindingProperties.getProducer();
	if (producerProperties == null) {
		producerProperties = this.bindingServiceProperties.getProducerProperties(name);
		producerProperties.setUseNativeEncoding(true);
	}
	else {
		if (!encodingDecodingBindAdviceHandler.isEncodingSettingProvided()) {
			producerProperties.setUseNativeEncoding(true);
		}
	}
	return createProxyForKStream(name);
}

Source File: KafkaStreamsAggregateSample.java From spring-cloud-stream-samples with Apache License 2.0

6 votes

@Bean
public Consumer<KStream<String, DomainEvent>> aggregate() {

	ObjectMapper mapper = new ObjectMapper();
	Serde<DomainEvent> domainEventSerde = new JsonSerde<>( DomainEvent.class, mapper );

	return input -> input
			.groupBy(
					(s, domainEvent) -> domainEvent.boardUuid,
					Grouped.with(null, domainEventSerde))
			.aggregate(
					String::new,
					(s, domainEvent, board) -> board.concat(domainEvent.eventType),
					Materialized.<String, String, KeyValueStore<Bytes, byte[]>>as("test-events-snapshots")
							.withKeySerde(Serdes.String()).
							withValueSerde(Serdes.String())
			);
}

Source File: WordCountStream.java From micronaut-kafka with Apache License 2.0

6 votes

@Singleton
@Named(MY_STREAM)
KStream<String, String> myStream(
        @Named(MY_STREAM) ConfiguredStreamBuilder builder) {

    // end::namedStream[]
    // set default serdes
    Properties props = builder.getConfiguration();
    props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");

    KStream<String, String> source = builder.stream(NAMED_WORD_COUNT_INPUT);
    KTable<String, Long> counts = source
            .flatMapValues(value -> Arrays.asList(value.toLowerCase(Locale.getDefault()).split(" ")))
            .groupBy((key, value) -> value)
            .count();

    // need to override value serde to Long type
    counts.toStream().to(NAMED_WORD_COUNT_OUTPUT, Produced.with(Serdes.String(), Serdes.Long()));
    return source;
}

Source File: SummaryBulkAggregation.java From kafka-graphs with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
@Override
public KTable<Windowed<Short>, T> run(final KStream<Edge<K>, EV> edgeStream) {

    //For parallel window support we key the edge stream by partition and apply a parallel fold per partition.
    //Finally, we merge all locally combined results into our final graph aggregation property.
    KTable<Windowed<Short>, S> partialAgg = edgeStream
        .groupByKey(Grouped.with(new KryoSerde<>(), new KryoSerde<>()))
        .windowedBy(TimeWindows.of(Duration.ofMillis(timeMillis)))
        .aggregate(this::initialValue, new PartialAgg<>(updateFun()))
        .toStream()
        .groupBy((k, v) -> GLOBAL_KEY)
        .windowedBy(TimeWindows.of(Duration.ofMillis(timeMillis)))
        .reduce(combineFun())
        .mapValues(aggregator(edgeStream), Materialized.<Windowed<Short>, S, KeyValueStore<Bytes, byte[]>>
            as(KGraph.generateStoreName()).withKeySerde(new KryoSerde<>()).withValueSerde(new KryoSerde<>()));

    if (transform() != null) {
        return partialAgg.mapValues(
            transform(),
            Materialized.<Windowed<Short>, T, KeyValueStore<Bytes, byte[]>>
                as(KGraph.generateStoreName()).withKeySerde(new KryoSerde<>()).withValueSerde(new KryoSerde<>())
        );
    }

    return (KTable<Windowed<Short>, T>) partialAgg;
}

Source File: WordCountProcessorApplicationTests.java From spring-cloud-stream-samples with Apache License 2.0

5 votes

/**
   * Setup Stream topology
   * Add KStream based on @StreamListener annotation
   * Add to(topic) based @SendTo annotation
   */
  @Before
  public void setup() {
      final StreamsBuilder builder = new StreamsBuilder();
      KStream<Bytes, String> input = builder.stream(INPUT_TOPIC, Consumed.with(nullSerde, stringSerde));
      KafkaStreamsWordCountApplication.WordCountProcessorApplication app = new KafkaStreamsWordCountApplication.WordCountProcessorApplication();
      final Function<KStream<Bytes, String>, KStream<Bytes, KafkaStreamsWordCountApplication.WordCount>> process = app.process();

final KStream<Bytes, KafkaStreamsWordCountApplication.WordCount> output = process.apply(input);

output.to(OUTPUT_TOPIC, Produced.with(nullSerde, countSerde));

      testDriver = new TopologyTestDriver(builder.build(), getStreamsConfiguration());
  }

Source File: KStreamStreamListenerResultAdapter.java From spring-cloud-stream-binder-kafka with Apache License 2.0

5 votes

@Override
@SuppressWarnings("unchecked")
public Closeable adapt(KStream streamListenerResult,
		KStreamBoundElementFactory.KStreamWrapper boundElement) {
	boundElement.wrap(streamListenerResult);
	return new NoOpCloseable();
}

Source File: KafkaStreamsWordCountApplication.java From spring-cloud-stream-samples with Apache License 2.0

5 votes

@Bean
public Function<KStream<Object, String>, KStream<?, WordCount>> process() {

	return input -> input
			.flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
			.map((key, value) -> new KeyValue<>(value, value))
			.groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
			.windowedBy(TimeWindows.of(Duration.ofSeconds(60)))
			.count(Materialized.as("WordCounts-1"))
			.toStream()
			.map((key, value) -> new KeyValue<>(null,
					new WordCount(key.key(), value, new Date(key.window().start()), new Date(key.window().end()))));
}

Source File: KafkaStreamsBinderWordCountFunctionTests.java From spring-cloud-stream-binder-kafka with Apache License 2.0

5 votes

@Bean
public Function<KStream<Object, String>, KStream<String, WordCount>> process() {

	return input -> input
			.flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
			.map((key, value) -> new KeyValue<>(value, value))
			.groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
			.windowedBy(TimeWindows.of(5000))
			.count(Materialized.as("foo-WordCounts"))
			.toStream()
			.map((key, value) -> new KeyValue<>(key.key(), new WordCount(key.key(), value,
					new Date(key.window().start()), new Date(key.window().end()))));
}

Source File: AbstractKafkaStreamsBinderProcessor.java From spring-cloud-stream-binder-kafka with Apache License 2.0

5 votes

protected KStream<?, ?> getKStream(String inboundName, BindingProperties bindingProperties, KafkaStreamsConsumerProperties kafkaStreamsConsumerProperties,
		StreamsBuilder streamsBuilder, Serde<?> keySerde, Serde<?> valueSerde, Topology.AutoOffsetReset autoOffsetReset, boolean firstBuild) {
	if (firstBuild) {
		addStateStoreBeans(streamsBuilder);
	}

	KStream<?, ?> stream;
	if (this.kafkaStreamsExtendedBindingProperties
			.getExtendedConsumerProperties(inboundName).isDestinationIsPattern()) {
		final Pattern pattern = Pattern.compile(this.bindingServiceProperties.getBindingDestination(inboundName));
		stream = streamsBuilder.stream(pattern);
	}
	else {
		String[] bindingTargets = StringUtils.commaDelimitedListToStringArray(
				this.bindingServiceProperties.getBindingDestination(inboundName));
		final Consumed<?, ?> consumed = getConsumed(kafkaStreamsConsumerProperties, keySerde, valueSerde, autoOffsetReset);
		stream = streamsBuilder.stream(Arrays.asList(bindingTargets),
				consumed);
	}
	final boolean nativeDecoding = this.bindingServiceProperties
			.getConsumerProperties(inboundName).isUseNativeDecoding();
	if (nativeDecoding) {
		LOG.info("Native decoding is enabled for " + inboundName
				+ ". Inbound deserialization done at the broker.");
	}
	else {
		LOG.info("Native decoding is disabled for " + inboundName
				+ ". Inbound message conversion done by Spring Cloud Stream.");
	}

	return getkStream(bindingProperties, stream, nativeDecoding);
}

Source File: ErrorImporter.java From SkaETL with Apache License 2.0

5 votes

public void activate() {
    log.info("Activating error importer");
    StreamsBuilder builder = new StreamsBuilder();
    final Serde<ErrorData> errorDataSerde = Serdes.serdeFrom(new GenericSerializer<>(), new GenericDeserializer<>(ErrorData.class));

    KStream<String, ErrorData> streamToES = builder.stream(kafkaConfiguration.getErrorTopic(), Consumed.with(Serdes.String(), errorDataSerde));

    streamToES.process(() -> elasticsearchProcessor);

    errorStream = new KafkaStreams(builder.build(), KafkaUtils.createKStreamProperties(INPUT_PROCESS_ERROR, kafkaConfiguration.getBootstrapServers()));
    Runtime.getRuntime().addShutdownHook(new Thread(errorStream::close));

    errorStream.start();
}

Source File: KafkaStreamsStreamListenerSetupMethodOrchestrator.java From spring-cloud-stream-binder-kafka with Apache License 2.0

5 votes

private boolean methodReturnTypeSuppports(Method method) {
	Class<?> returnType = method.getReturnType();
	if (returnType.equals(KStream.class) || (returnType.isArray()
			&& returnType.getComponentType().equals(KStream.class))) {
		return true;
	}
	return false;
}

Source File: StreamToTableJoinFunctionTests.java From spring-cloud-stream-binder-kafka with Apache License 2.0

5 votes

@Bean
public BiConsumer<KStream<String, Long>, KTable<String, String>> process() {
	return (userClicksStream, userRegionsTable) -> {
		userClicksStream.foreach((key, value) -> latch.countDown());
		userRegionsTable.toStream().foreach((key, value) -> latch.countDown());
	};
}

Source File: KafkaStreamsLiveTest.java From tutorials with MIT License

5 votes

@Test
@Ignore("it needs to have kafka broker running on local")
public void shouldTestKafkaStreams() throws InterruptedException {
    // given
    String inputTopic = "inputTopic";

    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-live-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Use a temporary directory for storing state, which will be automatically removed after the test.
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());

    // when
    StreamsBuilder builder = new StreamsBuilder();
    KStream<String, String> textLines = builder.stream(inputTopic);
    Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS);

    KTable<String, Long> wordCounts = textLines.flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase()))).groupBy((key, word) -> word).count();

    textLines.foreach((word, count) -> System.out.println("word: " + word + " -> " + count));

    String outputTopic = "outputTopic";
    final Serde<String> stringSerde = Serdes.String();
    final Serde<String> longSerde = Serdes.String();
    textLines.to(outputTopic, Produced.with(stringSerde,longSerde));

    KafkaStreams streams = new KafkaStreams(new Topology(), streamsConfiguration);
    streams.start();

    // then
    Thread.sleep(30000);
    streams.close();
}

Source File: TwitterStreamsAnalyzer.java From kafka-streams with Apache License 2.0

5 votes

public void run()  {
    StreamsConfig streamsConfig = new StreamsConfig(getProperties());

    JsonSerializer<Tweet> tweetJsonSerializer = new JsonSerializer<>();
    JsonDeserializer<Tweet> tweetJsonDeserializer = new JsonDeserializer<>(Tweet.class);
    Serde<Tweet> tweetSerde = Serdes.serdeFrom(tweetJsonSerializer, tweetJsonDeserializer);

    KStreamBuilder kStreamBuilder = new KStreamBuilder();

    Classifier classifier = new Classifier();
    classifier.train(new File("src/main/resources/kafkaStreamsTwitterTrainingData_clean.csv"));

    KeyValueMapper<String, Tweet, String> languageToKey = (k, v) ->
       StringUtils.isNotBlank(v.getText()) ? classifier.classify(v.getText()):"unknown";

    Predicate<String, Tweet> isEnglish = (k, v) -> k.equals("english");
    Predicate<String, Tweet> isFrench =  (k, v) -> k.equals("french");
    Predicate<String, Tweet> isSpanish = (k, v) -> k.equals("spanish");

    KStream<String, Tweet> tweetKStream = kStreamBuilder.stream(Serdes.String(), tweetSerde, "twitterData");

    KStream<String, Tweet>[] filteredStreams = tweetKStream.selectKey(languageToKey).branch(isEnglish, isFrench, isSpanish);

    filteredStreams[0].to(Serdes.String(), tweetSerde, "english");
    filteredStreams[1].to(Serdes.String(), tweetSerde, "french");
    filteredStreams[2].to(Serdes.String(), tweetSerde, "spanish");

    kafkaStreams = new KafkaStreams(kStreamBuilder, streamsConfig);
    System.out.println("Starting twitter analysis streams");
    kafkaStreams.start();
    System.out.println("Started");

}

Source File: EdgeStream.java From kafka-graphs with Apache License 2.0

5 votes

/**
 * Get the out-degree stream
 *
 * @return a stream of vertices, with the out-degree as the vertex value
 */
@Override
public KStream<K, Long> outDegrees() {
    return aggregate(
        new DegreeTypeSeparator<K, EV>(false, true),
        new DegreeMapFunction<K>()
    );
}

Source File: OptimizationStream.java From micronaut-kafka with Apache License 2.0

5 votes

@Singleton
@Named(STREAM_OPTIMIZATION_OFF)
KStream<String, String> optimizationOff(
        @Named(STREAM_OPTIMIZATION_OFF) ConfiguredStreamBuilder builder) {
    // set default serdes
    Properties props = builder.getConfiguration();
    props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");

    KTable<String, String> table = builder
            .table(OPTIMIZATION_OFF_INPUT, Materialized.as(OPTIMIZATION_OFF_STORE));

    return table.toStream();
}

org.apache.kafka.streams.kstream.KStream Java Examples