Java Code Examples for org.apache.kafka.streams.kstream.KStreamBuilder#stream()
The following examples show how to use
org.apache.kafka.streams.kstream.KStreamBuilder#stream() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KafkaStreamWordCount.java From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License | 6 votes |
public static void main(String[] args) throws Exception { Properties kafkaStreamProperties = new Properties(); kafkaStreamProperties.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-stream-wordCount"); kafkaStreamProperties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); kafkaStreamProperties.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181"); kafkaStreamProperties.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); kafkaStreamProperties.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); Serde<String> stringSerde = Serdes.String(); Serde<Long> longSerde = Serdes.Long(); KStreamBuilder streamTopology = new KStreamBuilder(); KStream<String, String> topicRecords = streamTopology.stream(stringSerde, stringSerde, "input"); KStream<String, Long> wordCounts = topicRecords .flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+"))) .map((key, word) -> new KeyValue<>(word, word)) .countByKey("Count") .toStream(); wordCounts.to(stringSerde, longSerde, "wordCount"); KafkaStreams streamManager = new KafkaStreams(streamTopology, kafkaStreamProperties); streamManager.start(); Runtime.getRuntime().addShutdownHook(new Thread(streamManager::close)); }
Example 2
Source File: IPFraudKafkaStreamApp.java From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License | 6 votes |
public static void main(String[] args) throws Exception { Properties kafkaStreamProperties = new Properties(); kafkaStreamProperties.put(StreamsConfig.APPLICATION_ID_CONFIG, "IP-Fraud-Detection"); kafkaStreamProperties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); kafkaStreamProperties.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181"); kafkaStreamProperties.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); kafkaStreamProperties.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); Serde<String> stringSerde = Serdes.String(); KStreamBuilder fraudDetectionTopology = new KStreamBuilder(); KStream<String, String> ipRecords = fraudDetectionTopology.stream(stringSerde, stringSerde, propertyReader.getPropertyValue("topic")); KStream<String, String> fraudIpRecords = ipRecords .filter((k, v) -> isFraud(v)); fraudIpRecords.to(propertyReader.getPropertyValue("output_topic")); KafkaStreams streamManager = new KafkaStreams(fraudDetectionTopology, kafkaStreamProperties); streamManager.start(); Runtime.getRuntime().addShutdownHook(new Thread(streamManager::close)); }
Example 3
Source File: KafkaStreamingMain.java From kafka-streams-api-websockets with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { Properties props = new Properties(); props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streaming-example"); props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1500); // To get data produced before process started // props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); // props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0); KStreamBuilder builder = new KStreamBuilder(); KStream<String, String> source = builder.stream("data-in"); KStream<String, String> stats = source.groupByKey() .aggregate(KafkaStreamingStatistics::new, (k, v, clusterstats) -> clusterstats.add(v), TimeWindows.of(60000).advanceBy(10000), Serdes.serdeFrom(new MySerde(), new MySerde()), "data-store") .toStream((key, value) -> key.key().toString() + " " + key.window().start()) .mapValues((job) -> job.computeAvgTime().toString()); stats.to(Serdes.String(), Serdes.String(), "data-out"); KafkaStreams streams = new KafkaStreams(builder, props); streams.cleanUp(); streams.start(); Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); }
Example 4
Source File: KafkaStreamsLiveTest.java From tutorials with MIT License | 5 votes |
@Test @Ignore("it needs to have kafka broker running on local") public void shouldTestKafkaStreams() throws InterruptedException { //given String inputTopic = "inputTopic"; Properties streamsConfiguration = new Properties(); streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-live-test"); streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000); streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); // Use a temporary directory for storing state, which will be automatically removed after the test. streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath()); //when KStreamBuilder builder = new KStreamBuilder(); KStream<String, String> textLines = builder.stream(inputTopic); Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); KTable<String, Long> wordCounts = textLines .flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase()))) .groupBy((key, word) -> word) .count(); wordCounts.foreach((word, count) -> System.out.println("word: " + word + " -> " + count)); String outputTopic = "outputTopic"; final Serde<String> stringSerde = Serdes.String(); final Serde<Long> longSerde = Serdes.Long(); wordCounts.to(stringSerde, longSerde, outputTopic); KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration); streams.start(); //then Thread.sleep(30000); streams.close(); }
Example 5
Source File: ExclamationKafkaStream.java From kafka-streams-ex with MIT License | 5 votes |
/** Connects the topic "console" with the topic "exclaimed", adding two * exclamation points to the input values. * * @param args Not used. */ public static void main(String[] args) { // Configuration stuff. Properties config = new Properties(); // For the cluster. Assumes everything is local. config.put(StreamsConfig.APPLICATION_ID_CONFIG, "exclamation-kafka-streams"); config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181"); // Serde. config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName()); config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); KStreamBuilder builder = new KStreamBuilder(); // Read the stream from the topic into a KStream. KStream<byte[], String> text = builder.stream("console"); // Apply the transformation. KStream<byte[], String> exclamation = text.mapValues(x -> x + "!") .mapValues(x -> x + "!"); // Sink it. Uses the configured serializers. exclamation.to("exclamated"); // Build and run. KafkaStreams streams = new KafkaStreams(builder, config); streams.start(); }
Example 6
Source File: StocksKafkaStreamsDriver.java From kafka-streams with Apache License 2.0 | 5 votes |
public static void main(String[] args) { StreamsConfig streamingConfig = new StreamsConfig(getProperties()); JsonSerializer<StockTransactionCollector> stockTransactionsSerializer = new JsonSerializer<>(); JsonDeserializer<StockTransactionCollector> stockTransactionsDeserializer = new JsonDeserializer<>(StockTransactionCollector.class); JsonDeserializer<StockTransaction> stockTxnDeserializer = new JsonDeserializer<>(StockTransaction.class); JsonSerializer<StockTransaction> stockTxnJsonSerializer = new JsonSerializer<>(); Serde<StockTransaction> transactionSerde = Serdes.serdeFrom(stockTxnJsonSerializer,stockTxnDeserializer); StringSerializer stringSerializer = new StringSerializer(); StringDeserializer stringDeserializer = new StringDeserializer(); Serde<String> stringSerde = Serdes.serdeFrom(stringSerializer,stringDeserializer); Serde<StockTransactionCollector> collectorSerde = Serdes.serdeFrom(stockTransactionsSerializer,stockTransactionsDeserializer); WindowedSerializer<String> windowedSerializer = new WindowedSerializer<>(stringSerializer); WindowedDeserializer<String> windowedDeserializer = new WindowedDeserializer<>(stringDeserializer); Serde<Windowed<String>> windowedSerde = Serdes.serdeFrom(windowedSerializer,windowedDeserializer); KStreamBuilder kStreamBuilder = new KStreamBuilder(); KStream<String,StockTransaction> transactionKStream = kStreamBuilder.stream(stringSerde,transactionSerde,"stocks"); transactionKStream.map((k,v)-> new KeyValue<>(v.getSymbol(),v)) .through(stringSerde, transactionSerde,"stocks-out") .groupBy((k,v) -> k, stringSerde, transactionSerde) .aggregate(StockTransactionCollector::new, (k, v, stockTransactionCollector) -> stockTransactionCollector.add(v), TimeWindows.of(10000), collectorSerde, "stock-summaries") .to(windowedSerde,collectorSerde,"transaction-summary"); System.out.println("Starting StockStreams Example"); KafkaStreams kafkaStreams = new KafkaStreams(kStreamBuilder,streamingConfig); kafkaStreams.start(); System.out.println("Now started StockStreams Example"); }
Example 7
Source File: TwitterStreamsAnalyzer.java From kafka-streams with Apache License 2.0 | 5 votes |
public void run() { StreamsConfig streamsConfig = new StreamsConfig(getProperties()); JsonSerializer<Tweet> tweetJsonSerializer = new JsonSerializer<>(); JsonDeserializer<Tweet> tweetJsonDeserializer = new JsonDeserializer<>(Tweet.class); Serde<Tweet> tweetSerde = Serdes.serdeFrom(tweetJsonSerializer, tweetJsonDeserializer); KStreamBuilder kStreamBuilder = new KStreamBuilder(); Classifier classifier = new Classifier(); classifier.train(new File("src/main/resources/kafkaStreamsTwitterTrainingData_clean.csv")); KeyValueMapper<String, Tweet, String> languageToKey = (k, v) -> StringUtils.isNotBlank(v.getText()) ? classifier.classify(v.getText()):"unknown"; Predicate<String, Tweet> isEnglish = (k, v) -> k.equals("english"); Predicate<String, Tweet> isFrench = (k, v) -> k.equals("french"); Predicate<String, Tweet> isSpanish = (k, v) -> k.equals("spanish"); KStream<String, Tweet> tweetKStream = kStreamBuilder.stream(Serdes.String(), tweetSerde, "twitterData"); KStream<String, Tweet>[] filteredStreams = tweetKStream.selectKey(languageToKey).branch(isEnglish, isFrench, isSpanish); filteredStreams[0].to(Serdes.String(), tweetSerde, "english"); filteredStreams[1].to(Serdes.String(), tweetSerde, "french"); filteredStreams[2].to(Serdes.String(), tweetSerde, "spanish"); kafkaStreams = new KafkaStreams(kStreamBuilder, streamsConfig); System.out.println("Starting twitter analysis streams"); kafkaStreams.start(); System.out.println("Started"); }
Example 8
Source File: CommandProcessor.java From cqrs-manager-for-distributed-reactive-services with Apache License 2.0 | 5 votes |
public void start() { KStreamBuilder builder = new KStreamBuilder(); Serde<UUID> keySerde = new FressianSerde(); Serde<Map> valSerde = new FressianSerde(); KStream<UUID, Map> commands = builder.stream(keySerde, valSerde, commandsTopic); KStream<UUID, Map> customerEvents = commands .filter((id, command) -> command.get(new Keyword("action")).equals(new Keyword("create-customer"))) .map((id, command) -> { logger.debug("Command received"); Map userEvent = new HashMap(command); userEvent.put(new Keyword("action"), new Keyword("customer-created")); userEvent.put(new Keyword("parent"), id); Map userValue = (Map) userEvent.get(new Keyword("data")); userValue.put(new Keyword("id"), UUID.randomUUID()); return new KeyValue<>(UUID.randomUUID(), userEvent); }).through(keySerde, valSerde, eventsTopic); KStream<UUID, Map> customers = customerEvents .map((id, event) -> { Map customer = (Map) event.get(new Keyword("data")); UUID customerId = (UUID) customer.get(new Keyword("id")); return new KeyValue<UUID, Map>(customerId, customer); }); customers.through(keySerde, valSerde, customersTopic); StateStoreSupplier store = Stores.create("Customers") .withKeys(keySerde) .withValues(valSerde) .persistent() .build(); builder.addStateStore(store); customers.process(customerStore, "Customers"); this.kafkaStreams = new KafkaStreams(builder, kafkaStreamsConfig); this.kafkaStreams.start(); }
Example 9
Source File: WikipediaStreamDemo.java From hello-kafka-streams with Apache License 2.0 | 5 votes |
private static KafkaStreams createWikipediaStreamsInstance(String bootstrapServers) { final Serializer<JsonNode> jsonSerializer = new JsonSerializer(); final Deserializer<JsonNode> jsonDeserializer = new JsonDeserializer(); final Serde<JsonNode> jsonSerde = Serdes.serdeFrom(jsonSerializer, jsonDeserializer); KStreamBuilder builder = new KStreamBuilder(); Properties props = new Properties(); props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wikipedia-streams"); props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); KStream<JsonNode, JsonNode> wikipediaRaw = builder.stream(jsonSerde, jsonSerde, "wikipedia-raw"); KStream<String, WikipediaMessage> wikipediaParsed = wikipediaRaw.map(WikipediaMessage::parceIRC) .filter(WikipediaMessage::filterNonNull) .through(Serdes.String(), new JsonPOJOSerde<>(WikipediaMessage.class), "wikipedia-parsed"); KTable<String, Long> totalEditsByUser = wikipediaParsed .filter((key, value) -> value.type == WikipediaMessage.Type.EDIT) .countByKey(Serdes.String(), "wikipedia-edits-by-user"); //some print totalEditsByUser.toStream().process(() -> new AbstractProcessor<String, Long>() { @Override public void process(String user, Long numEdits) { System.out.println("USER: " + user + " num.edits: " + numEdits); } }); return new KafkaStreams(builder, props); }
Example 10
Source File: Stream.java From hdinsight-kafka-java-get-started with MIT License | 5 votes |
public static void main( String[] args ) { Properties streamsConfig = new Properties(); // The name must be unique on the Kafka cluster streamsConfig.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-example"); // Brokers streamsConfig.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, args[0]); // Zookeeper //streamsConfig.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, args[1]); // SerDes for key and values streamsConfig.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); streamsConfig.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); // Serdes for the word and count Serde<String> stringSerde = Serdes.String(); Serde<Long> longSerde = Serdes.Long(); KStreamBuilder builder = new KStreamBuilder(); KStream<String, String> sentences = builder.stream(stringSerde, stringSerde, "test"); KStream<String, Long> wordCounts = sentences .flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+"))) .map((key, word) -> new KeyValue<>(word, word)) .countByKey("Counts") .toStream(); wordCounts.to(stringSerde, longSerde, "wordcounts"); KafkaStreams streams = new KafkaStreams(builder, streamsConfig); streams.start(); Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); }
Example 11
Source File: RegexTest.java From kafka-streams with Apache License 2.0 | 4 votes |
public static void main(String[] args) { StreamsConfig streamingConfig = new StreamsConfig(getProperties()); KStreamBuilder kStreamBuilder = new KStreamBuilder(); KStream<String, String> patternStreamI = kStreamBuilder.stream(Serdes.String(), Serdes.String(), Pattern.compile("topic-\\d")); KStream<String, String> namedTopicKStream = kStreamBuilder.stream(Serdes.String(), Serdes.String(), "topic-Z"); KStream<String, String> patternStreamII = kStreamBuilder.stream(Serdes.String(), Serdes.String(), Pattern.compile("topic-[A-Y]+")); patternStreamI.print("pattern-\\d"); namedTopicKStream.print("topic-Z"); patternStreamII.print("topic-[A-Y]+"); System.out.println("Starting stream regex consumer Example"); KafkaStreams kafkaStreams = new KafkaStreams(kStreamBuilder, streamingConfig); kafkaStreams.start(); }
Example 12
Source File: NotLookingAtFacebook.java From kafka-streams-ex with MIT License | 4 votes |
/** Runs the streams program, writing to the "notifications" and * "metrics" topics. * * @param args Not used. */ public static void main(String[] args) throws Exception { Properties config = new Properties(); config.put(StreamsConfig.APPLICATION_ID_CONFIG, "not-looking-at-facebook"); config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181"); config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); Properties producerConfig = new Properties(); producerConfig.put("bootstrap.servers", "localhost:9092"); producerConfig.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); producerConfig.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); String[] users = {"Doyin", "George", "Mark"}; // Build the topology. KStreamBuilder builder = new KStreamBuilder(); KTable<String, String> logons = builder.table("logons"); KStream<String, String> ticks = builder.stream("ticks"); KStream<String, String> notifications = ticks.leftJoin(logons, (nv, lv) -> new String[] {nv, lv}) // Filter out any nulls. .filter((k,v) -> v[1] != null) // Filter out anyone who's logged on. .filter((k,v) -> v[1] != "LOGON") // Now set the message. .mapValues(v -> "You are not currently viewing Facebook."); // Implement the metrics. KTable<Windowed<String>, Long> notificationCounts = notifications.countByKey( // Create a one minute window. TimeWindows.of("notificationCounts", 60000L) // Hop by ten seconds. .advanceBy(10000L) // Don't hang on to old values. .until(60000L)); // Convert notificationCounts to a stream, extract the key (ignore // the embedded time information), and sink to the "metrics" topic. notificationCounts.toStream((k,v) -> k.key()) .to(Serdes.String(), Serdes.Long(), "metrics"); // Nuisance delivered. You're welcome. notifications.to("notifications"); // Start producing logon messages. new Thread(new LogonGenerator(users, producerConfig)).start(); // Start producing notifications. new Thread(new TickGenerator(users, producerConfig)).start(); // Start the streams. KafkaStreams streams = new KafkaStreams(builder, config); streams.start(); }
Example 13
Source File: HoppingWindowKafkaStream.java From kafka-streams-ex with MIT License | 4 votes |
/** Runs the streams program, writing to the "long-counts-all" topic. * * @param args Not used. */ public static void main(String[] args) throws Exception{ Properties config = new Properties(); config.put(StreamsConfig.APPLICATION_ID_CONFIG, "hopping-window-kafka-streams"); config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181"); config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName()); config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); KStreamBuilder builder = new KStreamBuilder(); KStream<byte[], Long> longs = builder.stream( Serdes.ByteArray(), Serdes.Long(), "longs"); // The hopping windows will count the last second, two seconds, // three seconds, etc until the last ten seconds of data are in the // windows. KTable<Windowed<byte[]>, Long> longCounts = longs.groupByKey() .count(TimeWindows.of(10000L) .advanceBy(1000L) .until(10000L), "long-counts"); // Write to output topic. longCounts.toStream((k,v) -> k.key()) .map((k,v) -> KeyValue.pair(k, v)) .to(Serdes.ByteArray(), Serdes.Long(), "long-counts-all"); KafkaStreams streams = new KafkaStreams(builder, config); streams.start(); // Now generate the data and write to the topic. Properties producerConfig = new Properties(); producerConfig.put("bootstrap.servers", "localhost:9092"); producerConfig.put("key.serializer", "org.apache.kafka.common" + ".serialization.ByteArraySerializer"); producerConfig.put("value.serializer", "org.apache.kafka.common" + ".serialization.LongSerializer"); KafkaProducer producer = new KafkaProducer<byte[], Long>(producerConfig); Random rng = new Random(12345L); while(true) { producer.send(new ProducerRecord<byte[], Long>( "longs", "A".getBytes(), rng.nextLong()%10)); Thread.sleep(500L); } // Close infinite loop generating data. }
Example 14
Source File: ExclamationAdvancedKafkaStream.java From kafka-streams-ex with MIT License | 4 votes |
/** Connects the topic "console" to two topics, adds 2-4 exclamation points, * writing all messages to the "exclamated" topic and the messages with * four exclamation points to the "much-exclamated" topic. * * @param args Not used. */ public static void main(String[] args) { // Configuration stuff. Properties config = new Properties(); // For the cluster. Assumes everything is local. config.put(StreamsConfig.APPLICATION_ID_CONFIG, "exclamation-advanced-kafka-streams"); config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181"); // Serde. config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName()); config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); KStreamBuilder builder = new KStreamBuilder(); // Read the stream from the topic into a KStream. KStream<byte[], String> text = builder.stream("console"); // Apply the transformations. KStream<byte[], String> exclamation = text.mapValues(x -> x + getExclamations()) .mapValues(x -> x + getExclamations()); KStream<byte[], String> muchExclamation = exclamation.filter((k,v) -> v.endsWith("!!!!")); // Sink them both. exclamation.to("exclamated"); muchExclamation.to("much-exclamated"); // Build and run. KafkaStreams streams = new KafkaStreams(builder, config); streams.start(); }
Example 15
Source File: TumblingWindowKafkaStream.java From kafka-streams-ex with MIT License | 4 votes |
/** Runs the streams program, writing to the "long-counts-all" topic. * * @param args Not used. */ public static void main(String[] args) throws Exception { Properties config = new Properties(); config.put(StreamsConfig.APPLICATION_ID_CONFIG, "tumbling-window-kafka-streams"); config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181"); config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName()); config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.Long().getClass().getName()); KStreamBuilder builder = new KStreamBuilder(); KStream<byte[], Long> longs = builder.stream( Serdes.ByteArray(), Serdes.Long(), "longs"); // The tumbling windows will clear every ten seconds. KTable<Windowed<byte[]>, Long> longCounts = longs.groupByKey() .count(TimeWindows.of(10000L) .until(10000L), "long-counts"); // Write to topics. longCounts.toStream((k,v) -> k.key()) .to(Serdes.ByteArray(), Serdes.Long(), "long-counts-all"); KafkaStreams streams = new KafkaStreams(builder, config); streams.start(); // Now generate the data and write to the topic. Properties producerConfig = new Properties(); producerConfig.put("bootstrap.servers", "localhost:9092"); producerConfig.put("key.serializer", "org.apache.kafka.common" + ".serialization.ByteArraySerializer"); producerConfig.put("value.serializer", "org.apache.kafka.common" + ".serialization.LongSerializer"); KafkaProducer producer = new KafkaProducer<byte[], Long>(producerConfig); Random rng = new Random(12345L); while(true) { producer.send(new ProducerRecord<byte[], Long>( "longs", "A".getBytes(), rng.nextLong()%10)); Thread.sleep(500L); } // Close infinite data generating loop. }
Example 16
Source File: WordCountExample.java From kafka-streams-wordcount with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception{ Properties props = new Properties(); props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount"); props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); props.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); props.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data // Note: To re-run the demo, you need to use the offset reset tool: // https://cwiki.apache.org/confluence/display/KAFKA/Kafka+Streams+Application+Reset+Tool props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); // work-around for an issue around timing of creating internal topics // Fixed in Kafka 0.10.2.0 // don't use in large production apps - this increases network load // props.put(CommonClientConfigs.METADATA_MAX_AGE_CONFIG, 500); KStreamBuilder builder = new KStreamBuilder(); KStream<String, String> source = builder.stream("wordcount-input"); final Pattern pattern = Pattern.compile("\\W+"); KStream counts = source.flatMapValues(value-> Arrays.asList(pattern.split(value.toLowerCase()))) .map((key, value) -> new KeyValue<Object, Object>(value, value)) .filter((key, value) -> (!value.equals("the"))) .groupByKey() .count("CountStore").mapValues(value->Long.toString(value)).toStream(); counts.to("wordcount-output"); KafkaStreams streams = new KafkaStreams(builder, props); // This is for reset to work. Don't use in production - it causes the app to re-load the state from Kafka on every start streams.cleanUp(); streams.start(); // usually the stream application would be running forever, // in this example we just let it run for some time and stop since the input data is finite. Thread.sleep(5000L); streams.close(); }
Example 17
Source File: StatKStreamBuilderSupplier.java From DBus with Apache License 2.0 | 4 votes |
@Override public KStreamBuilder get() { KStreamBuilder builder = new KStreamBuilder(); KStream<String, String> stream = builder.stream((String[]) sources.toArray()); KStream<HBKeySupplier.HBKey, String>[] streams = stream.filter((k, v) -> StringUtils.startsWith(v, "data_increment_heartbeat")) .selectKey((k, v) -> new HBKeySupplier(k).get()) .filter((k, v) -> k.isNormalFormat) .flatMapValues(v -> Arrays.asList("stat", "monitor")) .branch((k, v) -> StringUtils.equalsIgnoreCase("stat", v), (k, v) -> StringUtils.equalsIgnoreCase("monitor", v)); streams[0].transform(StatTransformer::new).to(sink); KStream<String, PacketVo> monitor = streams[1].filterNot((k, v) -> !StringUtils.equalsIgnoreCase("abort", k.getStatus())) .map((k, v) -> { StringJoiner joiner = new StringJoiner("/"); joiner.add(HeartBeatConfigContainer.getInstance().getHbConf().getMonitorPath()) .add(k.getDs()) .add(StringUtils.equalsIgnoreCase(DbusDatasourceType.DB2.name(), k.getDbType()) ? StringUtils.upperCase(k.getSchema()) : k.getSchema()) .add(k.getTable()).add(String.valueOf(k.getPartition())); String node = joiner.toString(); PacketVo packet = new PacketVo(); packet.setNode(node); packet.setType("checkpoint"); packet.setTime(k.getCheckpointMs()); packet.setTxTime(k.getTxTimeMs()); return new KeyValue(node, packet); }); // 需要先进行shuff把key相同的分配到partition号 monitor.through("monitor-repartition") .reduceByKey((agg, v) -> v.getTime() > agg.getTime() ? v : agg, TimeWindows.of("monitor", 2 * 60 * 1000)) .toStream() .map((k, v) -> new KeyValue<>(k.key(), v)) .process(new MonitorProcessorSupplier(), "zkInfo"); return builder; }