Java Code Examples for org.apache.samza.application.descriptors.StreamApplicationDescriptor#withDefaultSystem()
The following examples show how to use
org.apache.samza.application.descriptors.StreamApplicationDescriptor#withDefaultSystem() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RepartitionExample.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking"); KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class)); KafkaOutputDescriptor<KV<String, MyStreamOutput>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(MyStreamOutput.class))); appDescriptor.withDefaultSystem(trackingSystem); MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor); OutputStream<KV<String, MyStreamOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor); pageViewEvents .partitionBy(pve -> pve.getMemberId(), pve -> pve, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy") .window(Windows.keyedTumblingWindow( KV::getKey, Duration.ofMinutes(5), () -> 0, (m, c) -> c + 1, null, null), "window") .map(windowPane -> KV.of(windowPane.getKey().getKey(), new MyStreamOutput(windowPane))) .sendTo(pageViewEventPerMember); }
Example 2
Source File: KeyValueStoreExample.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking"); KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class)); KafkaOutputDescriptor<KV<String, StatsOutput>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(StatsOutput.class))); appDescriptor.withDefaultSystem(trackingSystem); MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor); OutputStream<KV<String, StatsOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor); pageViewEvents .partitionBy(pve -> pve.getMemberId(), pve -> pve, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy") .map(KV::getValue) .flatMap(new MyStatsCounter()) .map(stats -> KV.of(stats.memberId, stats)) .sendTo(pageViewEventPerMember); }
Example 3
Source File: FilterExample.java From samza-hello-samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME) .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); KVSerde<String, PageView> serde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageView.class)); KafkaInputDescriptor<KV<String, PageView>> inputDescriptor = kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, serde); KafkaOutputDescriptor<KV<String, PageView>> outputDescriptor = kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, serde); appDescriptor.withDefaultSystem(kafkaSystemDescriptor); MessageStream<KV<String, PageView>> pageViews = appDescriptor.getInputStream(inputDescriptor); OutputStream<KV<String, PageView>> filteredPageViews = appDescriptor.getOutputStream(outputDescriptor); pageViews .filter(kv -> !INVALID_USER_ID.equals(kv.value.userId)) .sendTo(filteredPageViews); }
Example 4
Source File: RemoteTableJoinExample.java From samza-hello-samza with Apache License 2.0 | 5 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME) .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); KafkaInputDescriptor<String> stockSymbolInputDescriptor = kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, new StringSerde()); KafkaOutputDescriptor<StockPrice> stockPriceOutputDescriptor = kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, new JsonSerdeV2<>(StockPrice.class)); appDescriptor.withDefaultSystem(kafkaSystemDescriptor); MessageStream<String> stockSymbolStream = appDescriptor.getInputStream(stockSymbolInputDescriptor); OutputStream<StockPrice> stockPriceStream = appDescriptor.getOutputStream(stockPriceOutputDescriptor); RemoteTableDescriptor<String, Double> remoteTableDescriptor = new RemoteTableDescriptor("remote-table") .withReadRateLimit(10) .withReadFunction(new StockPriceReadFunction()); CachingTableDescriptor<String, Double> cachedRemoteTableDescriptor = new CachingTableDescriptor<>("cached-remote-table", remoteTableDescriptor) .withReadTtl(Duration.ofSeconds(5)); Table<KV<String, Double>> cachedRemoteTable = appDescriptor.getTable(cachedRemoteTableDescriptor); stockSymbolStream .map(symbol -> new KV<String, Void>(symbol, null)) .join(cachedRemoteTable, new JoinFn()) .sendTo(stockPriceStream); }
Example 5
Source File: StreamTableJoinExample.java From samza-hello-samza with Apache License 2.0 | 5 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { Serde<Profile> profileSerde = new JsonSerdeV2<>(Profile.class); Serde<PageView> pageViewSerde = new JsonSerdeV2<>(PageView.class); Serde<EnrichedPageView> joinResultSerde = new JsonSerdeV2<>(EnrichedPageView.class); KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME) .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); KafkaInputDescriptor<Profile> profileInputDescriptor = kafkaSystemDescriptor.getInputDescriptor(PROFILE_STREAM_ID, profileSerde); KafkaInputDescriptor<PageView> pageViewInputDescriptor = kafkaSystemDescriptor.getInputDescriptor(PAGEVIEW_STREAM_ID, pageViewSerde); KafkaOutputDescriptor<EnrichedPageView> joinResultOutputDescriptor = kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_TOPIC, joinResultSerde); RocksDbTableDescriptor<String, Profile> profileTableDescriptor = new RocksDbTableDescriptor<String, Profile>("profile-table", KVSerde.of(new StringSerde(), profileSerde)); appDescriptor.withDefaultSystem(kafkaSystemDescriptor); MessageStream<Profile> profileStream = appDescriptor.getInputStream(profileInputDescriptor); MessageStream<PageView> pageViewStream = appDescriptor.getInputStream(pageViewInputDescriptor); OutputStream<EnrichedPageView> joinResultStream = appDescriptor.getOutputStream(joinResultOutputDescriptor); Table<KV<String, Profile>> profileTable = appDescriptor.getTable(profileTableDescriptor); profileStream .map(profile -> KV.of(profile.userId, profile)) .sendTo(profileTable); pageViewStream .partitionBy(pv -> pv.userId, pv -> pv, KVSerde.of(new StringSerde(), pageViewSerde), "join") .join(profileTable, new JoinFn()) .sendTo(joinResultStream); }
Example 6
Source File: AzureBlobApplication.java From samza-hello-samza with Apache License 2.0 | 5 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { // Define a system descriptor for Kafka KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor("kafka").withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); KafkaInputDescriptor<PageView> pageViewInputDescriptor = kafkaSystemDescriptor.getInputDescriptor(INPUT_PAGEVIEW_STREAM_ID, new JsonSerdeV2<>(PageView.class)); // Define a system descriptor for Azure Blob Storage GenericSystemDescriptor azureBlobSystemDescriptor = new GenericSystemDescriptor(OUTPUT_SYSTEM, "org.apache.samza.system.azureblob.AzureBlobSystemFactory"); GenericOutputDescriptor<PageViewAvroRecord> azureBlobOuputDescriptor = azureBlobSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM, new NoOpSerde<>()); // Set Kafka as the default system for the job appDescriptor.withDefaultSystem(kafkaSystemDescriptor); // Define the input and output streams with descriptors MessageStream<PageView> pageViewInput = appDescriptor.getInputStream(pageViewInputDescriptor); OutputStream<PageViewAvroRecord> pageViewAvroRecordOutputStream = appDescriptor.getOutputStream(azureBlobOuputDescriptor); // Define the execution flow with the high-level API pageViewInput .map((message) -> { LOG.info("Sending: Received PageViewEvent with pageId: " + message.pageId); return PageViewAvroRecord.buildPageViewRecord(message); }) .sendTo(pageViewAvroRecordOutputStream); }
Example 7
Source File: WikipediaApplication.java From samza-hello-samza with Apache License 2.0 | 4 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { Duration windowDuration = appDescriptor.getConfig().containsKey("deploy.test") ? Duration.ofMillis(10) : Duration.ofSeconds(10); // Define a SystemDescriptor for Wikipedia data WikipediaSystemDescriptor wikipediaSystemDescriptor = new WikipediaSystemDescriptor("irc.wikimedia.org", 6667); // Define InputDescriptors for consuming wikipedia data WikipediaInputDescriptor wikipediaInputDescriptor = wikipediaSystemDescriptor .getInputDescriptor("en-wikipedia") .withChannel(WIKIPEDIA_CHANNEL); WikipediaInputDescriptor wiktionaryInputDescriptor = wikipediaSystemDescriptor .getInputDescriptor("en-wiktionary") .withChannel(WIKTIONARY_CHANNEL); WikipediaInputDescriptor wikiNewsInputDescriptor = wikipediaSystemDescriptor .getInputDescriptor("en-wikinews") .withChannel(WIKINEWS_CHANNEL); // Define a system descriptor for Kafka KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor("kafka") .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); // Define an output descriptor KafkaOutputDescriptor<WikipediaStatsOutput> statsOutputDescriptor = kafkaSystemDescriptor.getOutputDescriptor("wikipedia-stats", new JsonSerdeV2<>(WikipediaStatsOutput.class)); // Set the default system descriptor to Kafka, so that it is used for all // internal resources, e.g., kafka topic for checkpointing, coordinator stream. appDescriptor.withDefaultSystem(kafkaSystemDescriptor); MessageStream<WikipediaFeedEvent> wikipediaEvents = appDescriptor.getInputStream(wikipediaInputDescriptor); MessageStream<WikipediaFeedEvent> wiktionaryEvents = appDescriptor.getInputStream(wiktionaryInputDescriptor); MessageStream<WikipediaFeedEvent> wikiNewsEvents = appDescriptor.getInputStream(wikiNewsInputDescriptor); OutputStream<WikipediaStatsOutput> wikipediaStats = appDescriptor.getOutputStream(statsOutputDescriptor); // Merge inputs MessageStream<WikipediaFeedEvent> allWikipediaEvents = MessageStream.mergeAll(ImmutableList.of(wikipediaEvents, wiktionaryEvents, wikiNewsEvents)); // Parse, update stats, prepare output, and send allWikipediaEvents .map(WikipediaParser::parseEvent) .window(Windows.tumblingWindow(windowDuration, WikipediaStats::new, new WikipediaStatsAggregator(), WikipediaStats.serde()), "statsWindow") .map(this::formatOutput) .sendTo(wikipediaStats); }
Example 8
Source File: CouchbaseTableExample.java From samza-hello-samza with Apache License 2.0 | 4 votes |
@Override public void describe(StreamApplicationDescriptor app) { KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME) .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); KafkaInputDescriptor<String> wordInputDescriptor = kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, new StringSerde()); KafkaOutputDescriptor<String> countOutputDescriptor = kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, new StringSerde()); MyCouchbaseTableWriteFunction writeFn = new MyCouchbaseTableWriteFunction(BUCKET_NAME, CLUSTER_NODES) .withBootstrapCarrierDirectPort(COUCHBASE_PORT) .withUsernameAndPassword(BUCKET_NAME, BUCKET_PASSWORD) .withTimeout(Duration.ofSeconds(5)); TableRetryPolicy retryPolicy = new TableRetryPolicy() .withFixedBackoff(Duration.ofSeconds(1)) .withStopAfterAttempts(3); RemoteTableDescriptor couchbaseTableDescriptor = new RemoteTableDescriptor("couchbase-table") .withReadFunction(new NoOpTableReadFunction()) .withReadRateLimiterDisabled() .withWriteFunction(writeFn) .withWriteRetryPolicy(retryPolicy) .withWriteRateLimit(4); app.withDefaultSystem(kafkaSystemDescriptor); MessageStream<String> wordStream = app.getInputStream(wordInputDescriptor); OutputStream<String> countStream = app.getOutputStream(countOutputDescriptor); app.getTable(couchbaseTableDescriptor); wordStream .flatMap(m -> Arrays.asList(m.split(" "))) .filter(word -> word != null && word.length() > 0) .map(new MyCountFunction()) .map(countString -> currentTime() + " " + countString) .sendTo(countStream); }