org.apache.kafka.streams.kstream.KStreamBuilder#stream

Source File: KafkaStreamWordCount.java From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License

6 votes

public static void main(String[] args) throws Exception {
    Properties kafkaStreamProperties = new Properties();
    kafkaStreamProperties.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-stream-wordCount");
    kafkaStreamProperties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    kafkaStreamProperties.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");
    kafkaStreamProperties.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    kafkaStreamProperties.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());

    Serde<String> stringSerde = Serdes.String();
    Serde<Long> longSerde = Serdes.Long();

    KStreamBuilder streamTopology = new KStreamBuilder();
    KStream<String, String> topicRecords = streamTopology.stream(stringSerde, stringSerde, "input");
    KStream<String, Long> wordCounts = topicRecords
            .flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
            .map((key, word) -> new KeyValue<>(word, word))
            .countByKey("Count")
            .toStream();
    wordCounts.to(stringSerde, longSerde, "wordCount");

    KafkaStreams streamManager = new KafkaStreams(streamTopology, kafkaStreamProperties);
    streamManager.start();

    Runtime.getRuntime().addShutdownHook(new Thread(streamManager::close));
}

Source File: IPFraudKafkaStreamApp.java From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License

6 votes

public static void main(String[] args) throws Exception {
    Properties kafkaStreamProperties = new Properties();
    kafkaStreamProperties.put(StreamsConfig.APPLICATION_ID_CONFIG, "IP-Fraud-Detection");
    kafkaStreamProperties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    kafkaStreamProperties.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");
    kafkaStreamProperties.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    kafkaStreamProperties.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());

    Serde<String> stringSerde = Serdes.String();

    KStreamBuilder fraudDetectionTopology = new KStreamBuilder();

    KStream<String, String> ipRecords = fraudDetectionTopology.stream(stringSerde, stringSerde, propertyReader.getPropertyValue("topic"));

    KStream<String, String> fraudIpRecords = ipRecords
            .filter((k, v) -> isFraud(v));

    fraudIpRecords.to(propertyReader.getPropertyValue("output_topic"));

    KafkaStreams streamManager = new KafkaStreams(fraudDetectionTopology, kafkaStreamProperties);
    streamManager.start();

    Runtime.getRuntime().addShutdownHook(new Thread(streamManager::close));
}

Source File: KafkaStreamingMain.java From kafka-streams-api-websockets with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        Properties props = new Properties();
        props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streaming-example");
        props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
        props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
        props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
        props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1500);

//        To get data produced before process started
//        props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
//        props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);

        KStreamBuilder builder = new KStreamBuilder();

        KStream<String, String> source = builder.stream("data-in");

        KStream<String, String> stats = source.groupByKey()
                .aggregate(KafkaStreamingStatistics::new,
                    (k, v, clusterstats) -> clusterstats.add(v),
                    TimeWindows.of(60000).advanceBy(10000),
                    Serdes.serdeFrom(new MySerde(), new MySerde()),
                    "data-store")
                .toStream((key, value) -> key.key().toString() + " " + key.window().start())
                .mapValues((job) -> job.computeAvgTime().toString());

        stats.to(Serdes.String(), Serdes.String(),  "data-out");

        KafkaStreams streams = new KafkaStreams(builder, props);

        streams.cleanUp();
        streams.start();

        Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
    }

Source File: KafkaStreamsLiveTest.java From tutorials with MIT License

5 votes

@Test
@Ignore("it needs to have kafka broker running on local")
public void shouldTestKafkaStreams() throws InterruptedException {
    //given
    String inputTopic = "inputTopic";

    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-live-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Use a temporary directory for storing state, which will be automatically removed after the test.
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());

    //when
    KStreamBuilder builder = new KStreamBuilder();
    KStream<String, String> textLines = builder.stream(inputTopic);
    Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS);

    KTable<String, Long> wordCounts = textLines
            .flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase())))
            .groupBy((key, word) -> word)
            .count();

    wordCounts.foreach((word, count) -> System.out.println("word: " + word + " -> " + count));

    String outputTopic = "outputTopic";
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    wordCounts.to(stringSerde, longSerde, outputTopic);

    KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration);
    streams.start();

    //then
    Thread.sleep(30000);
    streams.close();
}

Source File: ExclamationKafkaStream.java From kafka-streams-ex with MIT License

5 votes

/** Connects the topic "console" with the topic "exclaimed", adding two
   *  exclamation points to the input values.
   * 
   * @param args Not used.
   */
  public static void main(String[] args) {
      
      // Configuration stuff.
      Properties config = new Properties();
     
      // For the cluster. Assumes everything is local.
      config.put(StreamsConfig.APPLICATION_ID_CONFIG, 
	"exclamation-kafka-streams");
      config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
      config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");
      
      // Serde.
config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG,
	Serdes.ByteArray().getClass().getName());
config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG,
	Serdes.String().getClass().getName());
      
      KStreamBuilder builder = new KStreamBuilder();
      
      // Read the stream from the topic into a KStream.
      KStream<byte[], String> text = builder.stream("console");
      
      // Apply the transformation.
      KStream<byte[], String> exclamation = 
          text.mapValues(x -> x + "!")
              .mapValues(x -> x + "!");
      
      // Sink it. Uses the configured serializers.
      exclamation.to("exclamated");
      
      // Build and run.
      KafkaStreams streams = new KafkaStreams(builder, config);
      
      streams.start();
  }

Source File: StocksKafkaStreamsDriver.java From kafka-streams with Apache License 2.0

5 votes

public static void main(String[] args) {

        StreamsConfig streamingConfig = new StreamsConfig(getProperties());

        JsonSerializer<StockTransactionCollector> stockTransactionsSerializer = new JsonSerializer<>();
        JsonDeserializer<StockTransactionCollector> stockTransactionsDeserializer = new JsonDeserializer<>(StockTransactionCollector.class);
        JsonDeserializer<StockTransaction> stockTxnDeserializer = new JsonDeserializer<>(StockTransaction.class);
        JsonSerializer<StockTransaction> stockTxnJsonSerializer = new JsonSerializer<>();
        Serde<StockTransaction> transactionSerde = Serdes.serdeFrom(stockTxnJsonSerializer,stockTxnDeserializer);
        StringSerializer stringSerializer = new StringSerializer();
        StringDeserializer stringDeserializer = new StringDeserializer();
        Serde<String> stringSerde = Serdes.serdeFrom(stringSerializer,stringDeserializer);
        Serde<StockTransactionCollector> collectorSerde = Serdes.serdeFrom(stockTransactionsSerializer,stockTransactionsDeserializer);
        WindowedSerializer<String> windowedSerializer = new WindowedSerializer<>(stringSerializer);
        WindowedDeserializer<String> windowedDeserializer = new WindowedDeserializer<>(stringDeserializer);
        Serde<Windowed<String>> windowedSerde = Serdes.serdeFrom(windowedSerializer,windowedDeserializer);

        KStreamBuilder kStreamBuilder = new KStreamBuilder();


        KStream<String,StockTransaction> transactionKStream =  kStreamBuilder.stream(stringSerde,transactionSerde,"stocks");

        transactionKStream.map((k,v)-> new KeyValue<>(v.getSymbol(),v))
                          .through(stringSerde, transactionSerde,"stocks-out")
                          .groupBy((k,v) -> k, stringSerde, transactionSerde)
                          .aggregate(StockTransactionCollector::new,
                               (k, v, stockTransactionCollector) -> stockTransactionCollector.add(v),
                               TimeWindows.of(10000),
                               collectorSerde, "stock-summaries")
                .to(windowedSerde,collectorSerde,"transaction-summary");


        System.out.println("Starting StockStreams Example");
        KafkaStreams kafkaStreams = new KafkaStreams(kStreamBuilder,streamingConfig);
        kafkaStreams.start();
        System.out.println("Now started StockStreams Example");

    }

Source File: TwitterStreamsAnalyzer.java From kafka-streams with Apache License 2.0

5 votes

public void run()  {
    StreamsConfig streamsConfig = new StreamsConfig(getProperties());

    JsonSerializer<Tweet> tweetJsonSerializer = new JsonSerializer<>();
    JsonDeserializer<Tweet> tweetJsonDeserializer = new JsonDeserializer<>(Tweet.class);
    Serde<Tweet> tweetSerde = Serdes.serdeFrom(tweetJsonSerializer, tweetJsonDeserializer);

    KStreamBuilder kStreamBuilder = new KStreamBuilder();

    Classifier classifier = new Classifier();
    classifier.train(new File("src/main/resources/kafkaStreamsTwitterTrainingData_clean.csv"));

    KeyValueMapper<String, Tweet, String> languageToKey = (k, v) ->
       StringUtils.isNotBlank(v.getText()) ? classifier.classify(v.getText()):"unknown";

    Predicate<String, Tweet> isEnglish = (k, v) -> k.equals("english");
    Predicate<String, Tweet> isFrench =  (k, v) -> k.equals("french");
    Predicate<String, Tweet> isSpanish = (k, v) -> k.equals("spanish");

    KStream<String, Tweet> tweetKStream = kStreamBuilder.stream(Serdes.String(), tweetSerde, "twitterData");

    KStream<String, Tweet>[] filteredStreams = tweetKStream.selectKey(languageToKey).branch(isEnglish, isFrench, isSpanish);

    filteredStreams[0].to(Serdes.String(), tweetSerde, "english");
    filteredStreams[1].to(Serdes.String(), tweetSerde, "french");
    filteredStreams[2].to(Serdes.String(), tweetSerde, "spanish");

    kafkaStreams = new KafkaStreams(kStreamBuilder, streamsConfig);
    System.out.println("Starting twitter analysis streams");
    kafkaStreams.start();
    System.out.println("Started");

}

Source File: CommandProcessor.java From cqrs-manager-for-distributed-reactive-services with Apache License 2.0

5 votes

public void start() {
    KStreamBuilder builder = new KStreamBuilder();

    Serde<UUID> keySerde = new FressianSerde();
    Serde<Map> valSerde = new FressianSerde();

    KStream<UUID, Map> commands = builder.stream(keySerde, valSerde, commandsTopic);
    KStream<UUID, Map> customerEvents = commands
            .filter((id, command) -> command.get(new Keyword("action")).equals(new Keyword("create-customer")))
            .map((id, command) -> {
                logger.debug("Command received");
                Map userEvent = new HashMap(command);
                userEvent.put(new Keyword("action"), new Keyword("customer-created"));
                userEvent.put(new Keyword("parent"), id);
                Map userValue = (Map) userEvent.get(new Keyword("data"));
                userValue.put(new Keyword("id"), UUID.randomUUID());
                return new KeyValue<>(UUID.randomUUID(), userEvent);
    }).through(keySerde, valSerde, eventsTopic);

    KStream<UUID, Map> customers = customerEvents
            .map((id, event) -> {
                Map customer = (Map) event.get(new Keyword("data"));
                UUID customerId = (UUID) customer.get(new Keyword("id"));
                return new KeyValue<UUID, Map>(customerId, customer);
            });

    customers.through(keySerde, valSerde, customersTopic);

    StateStoreSupplier store = Stores.create("Customers")
            .withKeys(keySerde)
            .withValues(valSerde)
            .persistent()
            .build();
    builder.addStateStore(store);

    customers.process(customerStore, "Customers");

    this.kafkaStreams = new KafkaStreams(builder, kafkaStreamsConfig);
    this.kafkaStreams.start();
}

Source File: WikipediaStreamDemo.java From hello-kafka-streams with Apache License 2.0

5 votes

private static KafkaStreams createWikipediaStreamsInstance(String bootstrapServers) {
    final Serializer<JsonNode> jsonSerializer = new JsonSerializer();
    final Deserializer<JsonNode> jsonDeserializer = new JsonDeserializer();
    final Serde<JsonNode> jsonSerde = Serdes.serdeFrom(jsonSerializer, jsonDeserializer);

    KStreamBuilder builder = new KStreamBuilder();
    Properties props = new Properties();
    props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wikipedia-streams");
    props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);


    KStream<JsonNode, JsonNode> wikipediaRaw = builder.stream(jsonSerde, jsonSerde, "wikipedia-raw");

    KStream<String, WikipediaMessage> wikipediaParsed =
            wikipediaRaw.map(WikipediaMessage::parceIRC)
                    .filter(WikipediaMessage::filterNonNull)
                    .through(Serdes.String(), new JsonPOJOSerde<>(WikipediaMessage.class), "wikipedia-parsed");

    KTable<String, Long> totalEditsByUser = wikipediaParsed
            .filter((key, value) -> value.type == WikipediaMessage.Type.EDIT)
            .countByKey(Serdes.String(), "wikipedia-edits-by-user");

    //some print
    totalEditsByUser.toStream().process(() -> new AbstractProcessor<String, Long>() {
        @Override
        public void process(String user, Long numEdits) {
            System.out.println("USER: " + user + " num.edits: " + numEdits);
        }
    });

    return new KafkaStreams(builder, props);

}

Source File: Stream.java From hdinsight-kafka-java-get-started with MIT License

5 votes

public static void main( String[] args ) {
    Properties streamsConfig = new Properties();
    // The name must be unique on the Kafka cluster
    streamsConfig.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-example");
    // Brokers
    streamsConfig.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, args[0]);
    // Zookeeper
    //streamsConfig.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, args[1]);
    // SerDes for key and values
    streamsConfig.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfig.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());

    // Serdes for the word and count
    Serde<String> stringSerde = Serdes.String();
    Serde<Long> longSerde = Serdes.Long();

    KStreamBuilder builder = new KStreamBuilder();
    KStream<String, String> sentences = builder.stream(stringSerde, stringSerde, "test");
    KStream<String, Long> wordCounts = sentences
            .flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
            .map((key, word) -> new KeyValue<>(word, word))
            .countByKey("Counts")
            .toStream();
    wordCounts.to(stringSerde, longSerde, "wordcounts");

    KafkaStreams streams = new KafkaStreams(builder, streamsConfig);
    streams.start();

    Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}

Source File: RegexTest.java From kafka-streams with Apache License 2.0

4 votes

public static void main(String[] args) {

        StreamsConfig streamingConfig = new StreamsConfig(getProperties());
        KStreamBuilder kStreamBuilder = new KStreamBuilder();


        KStream<String, String> patternStreamI = kStreamBuilder.stream(Serdes.String(), Serdes.String(), Pattern.compile("topic-\\d"));
        KStream<String, String> namedTopicKStream = kStreamBuilder.stream(Serdes.String(), Serdes.String(), "topic-Z");
        KStream<String, String> patternStreamII = kStreamBuilder.stream(Serdes.String(), Serdes.String(), Pattern.compile("topic-[A-Y]+"));

        patternStreamI.print("pattern-\\d");
        namedTopicKStream.print("topic-Z");
        patternStreamII.print("topic-[A-Y]+");


        System.out.println("Starting stream regex consumer Example");
        KafkaStreams kafkaStreams = new KafkaStreams(kStreamBuilder, streamingConfig);
        kafkaStreams.start();


    }

Source File: NotLookingAtFacebook.java From kafka-streams-ex with MIT License

4 votes

/** Runs the streams program, writing to the "notifications" and 
 *  "metrics" topics.
 *
 * @param args Not used.
 */
public static void main(String[] args) throws Exception {
    
    Properties config = new Properties();

    config.put(StreamsConfig.APPLICATION_ID_CONFIG,
        "not-looking-at-facebook");
    config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");

    config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, 
        Serdes.String().getClass().getName());
    config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, 
        Serdes.String().getClass().getName());

    Properties producerConfig = new Properties();
    producerConfig.put("bootstrap.servers", "localhost:9092");
    producerConfig.put("key.serializer",
        "org.apache.kafka.common.serialization.StringSerializer");
    producerConfig.put("value.serializer",
        "org.apache.kafka.common.serialization.StringSerializer");

    String[] users = {"Doyin", "George", "Mark"};
    
    // Build the topology.
    KStreamBuilder builder = new KStreamBuilder();

    KTable<String, String> logons = builder.table("logons");
    KStream<String, String> ticks = builder.stream("ticks");

    KStream<String, String> notifications = 
        ticks.leftJoin(logons, (nv, lv) -> new String[] {nv, lv})
             // Filter out any nulls.
             .filter((k,v) -> v[1] != null)
             // Filter out anyone who's logged on.
             .filter((k,v) -> v[1] != "LOGON")
             // Now set the message.
             .mapValues(v -> "You are not currently viewing Facebook.");

    // Implement the metrics.
    KTable<Windowed<String>, Long> notificationCounts = 
        notifications.countByKey(
            // Create a one minute window.
            TimeWindows.of("notificationCounts", 60000L)
                        // Hop by ten seconds.
                       .advanceBy(10000L)
                       // Don't hang on to old values.
                       .until(60000L));

    // Convert notificationCounts to a stream, extract the key (ignore
    // the embedded time information), and sink to the "metrics" topic.
    notificationCounts.toStream((k,v) -> k.key())
                      .to(Serdes.String(),
                          Serdes.Long(),
                          "metrics");

    // Nuisance delivered. You're welcome.
    notifications.to("notifications");

    // Start producing logon messages.
    new Thread(new LogonGenerator(users, producerConfig)).start();

    // Start producing notifications.
    new Thread(new TickGenerator(users, producerConfig)).start();

    // Start the streams.
    KafkaStreams streams = new KafkaStreams(builder, config);
    streams.start();

}

Source File: HoppingWindowKafkaStream.java From kafka-streams-ex with MIT License

4 votes

/** Runs the streams program, writing to the "long-counts-all" topic.
 *
 * @param args Not used.
 */
public static void main(String[] args) throws Exception{
    
    Properties config = new Properties();
    
    config.put(StreamsConfig.APPLICATION_ID_CONFIG,
        "hopping-window-kafka-streams");
    config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");
    config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG,
        Serdes.ByteArray().getClass().getName());
    config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG,
        Serdes.String().getClass().getName());
    
    KStreamBuilder builder = new KStreamBuilder();
    
    KStream<byte[], Long> longs = builder.stream(
        Serdes.ByteArray(), Serdes.Long(), "longs");
    
    // The hopping windows will count the last second, two seconds,
    // three seconds, etc until the last ten seconds of data are in the
    // windows.
    KTable<Windowed<byte[]>, Long> longCounts = 
        longs.groupByKey()
              .count(TimeWindows.of(10000L)
                                .advanceBy(1000L)
                                .until(10000L),
                     "long-counts");
                                    
    // Write to output topic.
    longCounts.toStream((k,v) -> k.key())
              .map((k,v) -> KeyValue.pair(k, v))
              .to(Serdes.ByteArray(),
                  Serdes.Long(),
                  "long-counts-all");
    
    KafkaStreams streams = new KafkaStreams(builder, config);
    streams.start();
    
    // Now generate the data and write to the topic.
    Properties producerConfig = new Properties();
    producerConfig.put("bootstrap.servers", "localhost:9092");
    producerConfig.put("key.serializer",
                       "org.apache.kafka.common" +
                       ".serialization.ByteArraySerializer");
    producerConfig.put("value.serializer",
                       "org.apache.kafka.common" +
                       ".serialization.LongSerializer");
    KafkaProducer producer = 
        new KafkaProducer<byte[], Long>(producerConfig);
    
    Random rng = new Random(12345L);
    
    while(true) {
        producer.send(new ProducerRecord<byte[], Long>(
            "longs", "A".getBytes(), rng.nextLong()%10));
        Thread.sleep(500L);
    } // Close infinite loop generating data.
}

Source File: ExclamationAdvancedKafkaStream.java From kafka-streams-ex with MIT License

4 votes

/** Connects the topic "console" to two topics, adds 2-4 exclamation points,
 *  writing all messages to the "exclamated" topic and the messages with
 *  four exclamation points to the "much-exclamated" topic.
 * 
 * @param args Not used. 
 */
public static void main(String[] args) {
    
    // Configuration stuff.
    Properties config = new Properties();
    
    // For the cluster. Assumes everything is local.
    config.put(StreamsConfig.APPLICATION_ID_CONFIG, 
        "exclamation-advanced-kafka-streams");
    config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");
    
    // Serde.
    config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG,
        Serdes.ByteArray().getClass().getName());
    config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG,
        Serdes.String().getClass().getName());
    
    KStreamBuilder builder = new KStreamBuilder();
    
    // Read the stream from the topic into a KStream.
    KStream<byte[], String> text = builder.stream("console");
    
    // Apply the transformations.
    KStream<byte[], String> exclamation = 
        text.mapValues(x -> x + getExclamations())
            .mapValues(x -> x + getExclamations());
    
    KStream<byte[], String> muchExclamation = 
        exclamation.filter((k,v) -> v.endsWith("!!!!"));
    
    // Sink them both.
    exclamation.to("exclamated");
    muchExclamation.to("much-exclamated");
    
    // Build and run.
    KafkaStreams streams = new KafkaStreams(builder, config);
    
    streams.start();
    
}

Source File: TumblingWindowKafkaStream.java From kafka-streams-ex with MIT License

4 votes

/** Runs the streams program, writing to the "long-counts-all" topic.
 *
 * @param args Not used.
 */
public static void main(String[] args) throws Exception {
    
    Properties config = new Properties();

    config.put(StreamsConfig.APPLICATION_ID_CONFIG,
        "tumbling-window-kafka-streams");
    config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,
        "localhost:9092");
    config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG,
        "localhost:2181");
    config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG,
        Serdes.ByteArray().getClass().getName());
    config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG,
        Serdes.Long().getClass().getName());

    KStreamBuilder builder = new KStreamBuilder();

    KStream<byte[], Long> longs = builder.stream(
        Serdes.ByteArray(), Serdes.Long(), "longs");

    // The tumbling windows will clear every ten seconds.
    KTable<Windowed<byte[]>, Long> longCounts =
        longs.groupByKey()
             .count(TimeWindows.of(10000L)
                               .until(10000L),
                    "long-counts");

    // Write to topics.
    longCounts.toStream((k,v) -> k.key())
              .to(Serdes.ByteArray(),
                  Serdes.Long(),
                  "long-counts-all");

    KafkaStreams streams = new KafkaStreams(builder, config);
    streams.start();

    // Now generate the data and write to the topic.
    Properties producerConfig = new Properties();
    producerConfig.put("bootstrap.servers", "localhost:9092");
    producerConfig.put("key.serializer",
                       "org.apache.kafka.common" +
                       ".serialization.ByteArraySerializer");
    producerConfig.put("value.serializer",
                       "org.apache.kafka.common" +
                       ".serialization.LongSerializer");

    KafkaProducer producer = 
        new KafkaProducer<byte[], Long>(producerConfig);

    Random rng = new Random(12345L);

    while(true) { 
        producer.send(new ProducerRecord<byte[], Long>(
            "longs", "A".getBytes(), rng.nextLong()%10));
        Thread.sleep(500L);
    } // Close infinite data generating loop.
}

Source File: WordCountExample.java From kafka-streams-wordcount with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception{

        Properties props = new Properties();
        props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount");
        props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
        props.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
        props.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());

        // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
        // Note: To re-run the demo, you need to use the offset reset tool:
        // https://cwiki.apache.org/confluence/display/KAFKA/Kafka+Streams+Application+Reset+Tool
        props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");

        // work-around for an issue around timing of creating internal topics
        // Fixed in Kafka 0.10.2.0
        // don't use in large production apps - this increases network load
        // props.put(CommonClientConfigs.METADATA_MAX_AGE_CONFIG, 500);

        KStreamBuilder builder = new KStreamBuilder();

        KStream<String, String> source = builder.stream("wordcount-input");


        final Pattern pattern = Pattern.compile("\\W+");
        KStream counts  = source.flatMapValues(value-> Arrays.asList(pattern.split(value.toLowerCase())))
                .map((key, value) -> new KeyValue<Object, Object>(value, value))
                .filter((key, value) -> (!value.equals("the")))
                .groupByKey()
                .count("CountStore").mapValues(value->Long.toString(value)).toStream();
        counts.to("wordcount-output");

        KafkaStreams streams = new KafkaStreams(builder, props);

        // This is for reset to work. Don't use in production - it causes the app to re-load the state from Kafka on every start
        streams.cleanUp();

        streams.start();

        // usually the stream application would be running forever,
        // in this example we just let it run for some time and stop since the input data is finite.
        Thread.sleep(5000L);

        streams.close();

    }

Source File: StatKStreamBuilderSupplier.java From DBus with Apache License 2.0

4 votes

@Override
public KStreamBuilder get() {
    KStreamBuilder builder = new KStreamBuilder();
    KStream<String, String> stream = builder.stream((String[]) sources.toArray());

    KStream<HBKeySupplier.HBKey, String>[] streams =
            stream.filter((k, v) -> StringUtils.startsWith(v, "data_increment_heartbeat"))
                    .selectKey((k, v) -> new HBKeySupplier(k).get())
                    .filter((k, v) -> k.isNormalFormat)
                    .flatMapValues(v -> Arrays.asList("stat", "monitor"))
                    .branch((k, v) -> StringUtils.equalsIgnoreCase("stat", v),
                            (k, v) -> StringUtils.equalsIgnoreCase("monitor", v));

    streams[0].transform(StatTransformer::new).to(sink);
    KStream<String, PacketVo> monitor =
            streams[1].filterNot((k, v) -> !StringUtils.equalsIgnoreCase("abort", k.getStatus()))
                    .map((k, v) -> {
                        StringJoiner joiner = new StringJoiner("/");
                        joiner.add(HeartBeatConfigContainer.getInstance().getHbConf().getMonitorPath())
                                .add(k.getDs())
                                .add(StringUtils.equalsIgnoreCase(DbusDatasourceType.DB2.name(), k.getDbType()) ? StringUtils.upperCase(k.getSchema()) : k.getSchema())
                                .add(k.getTable()).add(String.valueOf(k.getPartition()));

                        String node = joiner.toString();
                        PacketVo packet = new PacketVo();
                        packet.setNode(node);
                        packet.setType("checkpoint");
                        packet.setTime(k.getCheckpointMs());
                        packet.setTxTime(k.getTxTimeMs());
                        return new KeyValue(node, packet);
                    });

    // 需要先进行shuff把key相同的分配到partition号
    monitor.through("monitor-repartition")
            .reduceByKey((agg, v) -> v.getTime() > agg.getTime() ? v : agg, TimeWindows.of("monitor", 2 * 60 * 1000))
            .toStream()
            .map((k, v) -> new KeyValue<>(k.key(), v))
            .process(new MonitorProcessorSupplier(), "zkInfo");

    return builder;
}

Java Code Examples for org.apache.kafka.streams.kstream.KStreamBuilder#stream()