org.apache.samza.operators.MessageStream Java Examples
The following examples show how to use
org.apache.samza.operators.MessageStream.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestExecutionPlanner.java From samza with Apache License 2.0 | 6 votes |
private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamStreamJoin() { /** * Creates the following stream-stream join which is invalid due to partition count disagreement * between the 2 input streams. * * input1 (64) -- * | * join -> output1 (8) * | * input3 (32) -- */ return new StreamApplicationDescriptorImpl(appDesc -> { MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor); MessageStream<KV<Object, Object>> messageStream3 = appDesc.getInputStream(input3Descriptor); OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor); messageStream1 .join(messageStream3, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1") .sendTo(output1); }, config); }
Example #2
Source File: TestWindowOperator.java From samza with Apache License 2.0 | 6 votes |
private StreamApplicationDescriptorImpl getAggregateTumblingWindowStreamGraph(AccumulationMode mode, Duration timeDuration, Trigger<IntegerEnvelope> earlyTrigger) throws IOException { StreamApplication userApp = appDesc -> { KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde()); GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass"); GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde); MessageStream<KV<Integer, Integer>> integers = appDesc.getInputStream(inputDescriptor); integers .map(new KVMapFunction()) .window(Windows.<IntegerEnvelope, Integer>tumblingWindow(timeDuration, () -> 0, (m, c) -> c + 1, new IntegerSerde()) .setEarlyTrigger(earlyTrigger) .setAccumulationMode(mode), "w1") .sink((message, messageCollector, taskCoordinator) -> { SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream"); messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message)); }); }; return new StreamApplicationDescriptorImpl(userApp, config); }
Example #3
Source File: TumblingWindowApp.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class); KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde()); KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM); KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde); KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde); MessageStream<PageView> pageViews = appDescriptor.getInputStream(id); OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od); pageViews .filter(m -> !FILTER_KEY.equals(m.getUserId())) .window(Windows.keyedTumblingWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "tumblingWindow") .map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())) .sendTo(outputStream); }
Example #4
Source File: SessionWindowApp.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class); KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde()); KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM); KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde); KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde); MessageStream<PageView> pageViews = appDescriptor.getInputStream(id); OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od); pageViews .filter(m -> !FILTER_KEY.equals(m.getUserId())) .window(Windows.keyedSessionWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "sessionWindow") .map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())) .sendTo(outputStream); }
Example #5
Source File: AsyncApplicationExample.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking"); KafkaInputDescriptor<AdClickEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("adClickEvent", new JsonSerdeV2<>(AdClickEvent.class)); KafkaOutputDescriptor<KV<String, EnrichedAdClickEvent>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("enrichedAdClickEvent", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(EnrichedAdClickEvent.class))); MessageStream<AdClickEvent> adClickEventStream = appDescriptor.getInputStream(inputStreamDescriptor); OutputStream<KV<String, EnrichedAdClickEvent>> enrichedAdClickStream = appDescriptor.getOutputStream(outputStreamDescriptor); adClickEventStream .flatMapAsync(AsyncApplicationExample::enrichAdClickEvent) .map(enrichedAdClickEvent -> KV.of(enrichedAdClickEvent.getCountry(), enrichedAdClickEvent)) .sendTo(enrichedAdClickStream); }
Example #6
Source File: BroadcastExample.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class)); KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking"); KafkaInputDescriptor<KV<String, PageViewEvent>> pageViewEvent = trackingSystem.getInputDescriptor("pageViewEvent", serde); KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream1 = trackingSystem.getOutputDescriptor("outStream1", serde); KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream2 = trackingSystem.getOutputDescriptor("outStream2", serde); KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream3 = trackingSystem.getOutputDescriptor("outStream3", serde); MessageStream<KV<String, PageViewEvent>> inputStream = appDescriptor.getInputStream(pageViewEvent); inputStream.filter(m -> m.key.equals("key1")).sendTo(appDescriptor.getOutputStream(outStream1)); inputStream.filter(m -> m.key.equals("key2")).sendTo(appDescriptor.getOutputStream(outStream2)); inputStream.filter(m -> m.key.equals("key3")).sendTo(appDescriptor.getOutputStream(outStream3)); }
Example #7
Source File: WindowExample.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking"); KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class)); KafkaOutputDescriptor<Integer> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", new IntegerSerde()); SupplierFunction<Integer> initialValue = () -> 0; FoldLeftFunction<PageViewEvent, Integer> counter = (m, c) -> c == null ? 1 : c + 1; MessageStream<PageViewEvent> inputStream = appDescriptor.getInputStream(inputStreamDescriptor); OutputStream<Integer> outputStream = appDescriptor.getOutputStream(outputStreamDescriptor); // create a tumbling window that outputs the number of message collected every 10 minutes. // also emit early results if either the number of messages collected reaches 30000, or if no new messages arrive // for 1 minute. inputStream .window(Windows.tumblingWindow(Duration.ofMinutes(10), initialValue, counter, new IntegerSerde()) .setLateTrigger(Triggers.any(Triggers.count(30000), Triggers.timeSinceLastMessage(Duration.ofMinutes(1)))), "window") .map(WindowPane::getMessage) .sendTo(outputStream); }
Example #8
Source File: TestSchedulingApp.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class); KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("kafka"); KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(PAGE_VIEWS, serde); final MessageStream<PageView> pageViews = appDescriptor.getInputStream(isd); final MessageStream<PageView> output = pageViews.flatMap(new FlatmapScheduledFn()); MessageStreamAssert.that("Output from scheduling function should container all complete messages", output, serde) .containsInAnyOrder( Arrays.asList( new PageView("v1-complete", "p1", "u1"), new PageView("v2-complete", "p2", "u1"), new PageView("v3-complete", "p1", "u2"), new PageView("v4-complete", "p3", "u2") )); }
Example #9
Source File: WindowAssignTranslator.java From beam with Apache License 2.0 | 6 votes |
@Override public void translatePortable( PipelineNode.PTransformNode transform, QueryablePipeline pipeline, PortableTranslationContext ctx) { final RunnerApi.WindowIntoPayload payload; try { payload = RunnerApi.WindowIntoPayload.parseFrom(transform.getTransform().getSpec().getPayload()); } catch (InvalidProtocolBufferException e) { throw new IllegalArgumentException( String.format("failed to parse WindowIntoPayload: %s", transform.getId()), e); } @SuppressWarnings("unchecked") final WindowFn<T, ?> windowFn = (WindowFn<T, ?>) WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn()); final MessageStream<OpMessage<T>> inputStream = ctx.getOneInputMessageStream(transform); final MessageStream<OpMessage<T>> outputStream = inputStream.flatMap(OpAdapter.adapt(new WindowAssignOp<>(windowFn))); ctx.registerMessageStream(ctx.getOutputId(transform), outputStream); }
Example #10
Source File: FlattenPCollectionsTranslator.java From beam with Apache License 2.0 | 6 votes |
private static <T> MessageStream<OpMessage<T>> mergeInputStreams( List<MessageStream<OpMessage<T>>> inputStreams) { if (inputStreams.size() == 1) { return Iterables.getOnlyElement(inputStreams); } final Set<MessageStream<OpMessage<T>>> streamsToMerge = new HashSet<>(); inputStreams.forEach( stream -> { if (!streamsToMerge.add(stream)) { // Merge same streams. Make a copy of the current stream. streamsToMerge.add(stream.map(m -> m)); } }); return MessageStream.mergeAll(streamsToMerge); }
Example #11
Source File: MergeExample.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class)); KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking"); KafkaInputDescriptor<KV<String, PageViewEvent>> isd1 = trackingSystem.getInputDescriptor("pageViewStream1", serde); KafkaInputDescriptor<KV<String, PageViewEvent>> isd2 = trackingSystem.getInputDescriptor("pageViewStream2", serde); KafkaInputDescriptor<KV<String, PageViewEvent>> isd3 = trackingSystem.getInputDescriptor("pageViewStream3", serde); KafkaOutputDescriptor<KV<String, PageViewEvent>> osd = trackingSystem.getOutputDescriptor("mergedStream", serde); MessageStream .mergeAll(ImmutableList.of(appDescriptor.getInputStream(isd1), appDescriptor.getInputStream(isd2), appDescriptor.getInputStream(isd3))) .sendTo(appDescriptor.getOutputStream(osd)); }
Example #12
Source File: KeyValueStoreExample.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking"); KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class)); KafkaOutputDescriptor<KV<String, StatsOutput>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(StatsOutput.class))); appDescriptor.withDefaultSystem(trackingSystem); MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor); OutputStream<KV<String, StatsOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor); pageViewEvents .partitionBy(pve -> pve.getMemberId(), pve -> pve, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy") .map(KV::getValue) .flatMap(new MyStatsCounter()) .map(stats -> KV.of(stats.memberId, stats)) .sendTo(pageViewEventPerMember); }
Example #13
Source File: TestPartitionByOperatorSpec.java From samza with Apache License 2.0 | 6 votes |
@Test public void testPartitionByWithNoSerde() { MapFunction<Object, String> keyFn = m -> m.toString(); MapFunction<Object, Object> valueFn = m -> m; StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> { MessageStream inputStream = appDesc.getInputStream(testInputDescriptor); inputStream.partitionBy(keyFn, valueFn, mock(KVSerde.class), testRepartitionedStreamName); }, getConfig()); InputOperatorSpec inputOpSpec = streamAppDesc.getInputOperators().get( String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName)); assertNotNull(inputOpSpec); assertNull(inputOpSpec.getKeySerde()); assertNull(inputOpSpec.getValueSerde()); assertTrue(inputOpSpec.isKeyed()); assertNull(inputOpSpec.getScheduledFn()); assertNull(inputOpSpec.getWatermarkFn()); InputOperatorSpec originInputSpec = streamAppDesc.getInputOperators().get(testInputDescriptor.getStreamId()); assertTrue(originInputSpec.getRegisteredOperatorSpecs().toArray()[0] instanceof PartitionByOperatorSpec); PartitionByOperatorSpec reparOpSpec = (PartitionByOperatorSpec) originInputSpec.getRegisteredOperatorSpecs().toArray()[0]; assertEquals(reparOpSpec.getOpId(), String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName)); assertEquals(reparOpSpec.getKeyFunction(), keyFn); assertEquals(reparOpSpec.getValueFunction(), valueFn); assertEquals(reparOpSpec.getOutputStream().getStreamId(), reparOpSpec.getOpId()); assertNull(reparOpSpec.getScheduledFn()); assertNull(reparOpSpec.getWatermarkFn()); }
Example #14
Source File: TestExecutionPlanner.java From samza with Apache License 2.0 | 6 votes |
private StreamApplicationDescriptorImpl createSimpleGraph() { /** * a simple graph of partitionBy and map * * input1 -> partitionBy -> map -> output1 * */ return new StreamApplicationDescriptorImpl(appDesc-> { MessageStream<KV<Object, Object>> input1 = appDesc.getInputStream(input1Descriptor); OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor); input1 .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1") .map(kv -> kv) .sendTo(output1); }, config); }
Example #15
Source File: TestExecutionPlanner.java From samza with Apache License 2.0 | 6 votes |
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinAndSendToSameTable() { /** * A special example of stream-table join where a stream is joined with a table, and the result is * sent to the same table. This example is necessary to ensure {@link ExecutionPlanner} does not * get stuck traversing the virtual cycle between stream-table-join and send-to-table operator specs * indefinitely. * * The reason such virtual cycle is present is to support computing partitions of intermediate * streams participating in stream-table joins. Please, refer to SAMZA SEP-16 for more details. */ return new StreamApplicationDescriptorImpl(appDesc -> { MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor); TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor( "table-id", new KVSerde(new StringSerde(), new StringSerde())); Table table = appDesc.getTable(tableDescriptor); messageStream1 .join(table, mock(StreamTableJoinFunction.class)) .sendTo(table); }, config); }
Example #16
Source File: StreamApplicationDescriptorImpl.java From samza with Apache License 2.0 | 6 votes |
@Override public <M> MessageStream<M> getInputStream(InputDescriptor<M, ?> inputDescriptor) { SystemDescriptor systemDescriptor = inputDescriptor.getSystemDescriptor(); Optional<StreamExpander> expander = systemDescriptor.getExpander(); if (expander.isPresent()) { return expander.get().apply(this, inputDescriptor); } // TODO: SAMZA-1841: need to add to the broadcast streams if inputDescriptor is for a broadcast stream addInputDescriptor(inputDescriptor); String streamId = inputDescriptor.getStreamId(); Serde serde = inputDescriptor.getSerde(); KV<Serde, Serde> kvSerdes = getOrCreateStreamSerdes(streamId, serde); boolean isKeyed = serde instanceof KVSerde; InputTransformer transformer = inputDescriptor.getTransformer().orElse(null); InputOperatorSpec inputOperatorSpec = OperatorSpecs.createInputOperatorSpec(streamId, kvSerdes.getKey(), kvSerdes.getValue(), transformer, isKeyed, this.getNextOpId(OpCode.INPUT, null)); inputOperators.put(streamId, inputOperatorSpec); return new MessageStreamImpl(this, inputOperators.get(streamId)); }
Example #17
Source File: FilterExample.java From samza-hello-samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME) .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); KVSerde<String, PageView> serde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageView.class)); KafkaInputDescriptor<KV<String, PageView>> inputDescriptor = kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, serde); KafkaOutputDescriptor<KV<String, PageView>> outputDescriptor = kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, serde); appDescriptor.withDefaultSystem(kafkaSystemDescriptor); MessageStream<KV<String, PageView>> pageViews = appDescriptor.getInputStream(inputDescriptor); OutputStream<KV<String, PageView>> filteredPageViews = appDescriptor.getOutputStream(outputDescriptor); pageViews .filter(kv -> !INVALID_USER_ID.equals(kv.value.userId)) .sendTo(filteredPageViews); }
Example #18
Source File: TranslationContext.java From beam with Apache License 2.0 | 6 votes |
public <OutT> void registerInputMessageStream( PValue pvalue, InputDescriptor<org.apache.samza.operators.KV<?, OpMessage<OutT>>, ?> inputDescriptor) { // we want to register it with the Samza graph only once per i/o stream final String streamId = inputDescriptor.getStreamId(); if (registeredInputStreams.containsKey(streamId)) { MessageStream<OpMessage<OutT>> messageStream = registeredInputStreams.get(streamId); LOG.info( String.format( "Stream id %s has already been mapped to %s stream. Mapping %s to the same message stream.", streamId, messageStream, pvalue)); registerMessageStream(pvalue, messageStream); return; } @SuppressWarnings("unchecked") final MessageStream<OpMessage<OutT>> typedStream = getValueStream(appDescriptor.getInputStream(inputDescriptor)); registerMessageStream(pvalue, typedStream); registeredInputStreams.put(streamId, typedStream); }
Example #19
Source File: ProjectTranslator.java From samza with Apache License 2.0 | 6 votes |
void translate(final Project project, final String logicalOpId, final TranslatorContext context) { MessageStream<SamzaSqlRelMessage> messageStream = context.getMessageStream(project.getInput().getId()); final int projectId = project.getId(); MessageStream<SamzaSqlRelMessage> outputStream = messageStream.map(new ProjectMapFunction(projectId, queryId, logicalOpId)); List<RexNode> projects = project.getProjects(); List<Integer> flattenProjects = IntStream.range(0, projects.size()) .filter(i -> this.isFlatten(projects.get(i))) .boxed() .collect(Collectors.toList()); if (flattenProjects.size() > 0) { if (flattenProjects.size() > 1) { String msg = "Multiple flatten operators in a single query is not supported"; LOG.error(msg); throw new SamzaException(msg); } outputStream = translateFlatten(flattenProjects.get(0), outputStream); } context.registerMessageStream(project.getId(), outputStream); context.registerRelNode(project.getId(), project); }
Example #20
Source File: BroadcastAssertApp.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { Config config = appDescriptor.getConfig(); String inputTopic = config.get(INPUT_TOPIC_NAME_PROP); final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class); KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM); KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(inputTopic, serde); final MessageStream<PageView> broadcastPageViews = appDescriptor .getInputStream(isd) .broadcast(serde, "pv"); /** * Each task will see all the pageview events */ MessageStreamAssert.that("Each task contains all broadcast PageView events", broadcastPageViews, serde) .forEachTask() .containsInAnyOrder( Arrays.asList( new PageView("v1", "p1", "u1"), new PageView("v2", "p2", "u1"), new PageView("v3", "p1", "u2"), new PageView("v4", "p3", "u2") )); }
Example #21
Source File: PageViewCounterExample.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking"); KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class)); KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class))); MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor); OutputStream<KV<String, PageViewCount>> pageViewEventPerMemberStream = appDescriptor.getOutputStream(outputStreamDescriptor); SupplierFunction<Integer> initialValue = () -> 0; FoldLeftFunction<PageViewEvent, Integer> foldLeftFn = (m, c) -> c + 1; pageViewEvents .window(Windows.keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), initialValue, foldLeftFn, null, null) .setEarlyTrigger(Triggers.repeat(Triggers.count(5))) .setAccumulationMode(AccumulationMode.DISCARDING), "tumblingWindow") .map(windowPane -> KV.of(windowPane.getKey().getKey(), buildPageViewCount(windowPane))) .sendTo(pageViewEventPerMemberStream); }
Example #22
Source File: TestExecutionPlanner.java From samza with Apache License 2.0 | 5 votes |
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoin() { /** * Example stream-table join app. Expected partition counts of intermediate streams introduced * by partitionBy operations are enclosed in quotes. * * input2 (16) -> partitionBy ("32") -> send-to-table t * * join-table t ————— * | | * input1 (64) -> partitionBy ("32") _| | * join -> output1 (8) * | * input3 (32) —————— * */ return new StreamApplicationDescriptorImpl(appDesc -> { MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor); MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor); MessageStream<KV<Object, Object>> messageStream3 = appDesc.getInputStream(input3Descriptor); OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor); TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor( "table-id", new KVSerde(new StringSerde(), new StringSerde())); Table table = appDesc.getTable(tableDescriptor); messageStream2 .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1") .sendTo(table); messageStream1 .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2") .join(table, mock(StreamTableJoinFunction.class)) .join(messageStream3, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2") .sendTo(output1); }, config); }
Example #23
Source File: TestExecutionPlanner.java From samza with Apache License 2.0 | 5 votes |
private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoin() { /** * Example stream-table join that is invalid due to disagreement in partition count * between the 2 input streams. * * input1 (64) -> send-to-table t * * join-table t -> output1 (8) * | * input2 (16) ————————— * */ return new StreamApplicationDescriptorImpl(appDesc -> { MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor); MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor); OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor); TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor( "table-id", new KVSerde(new StringSerde(), new StringSerde())); Table table = appDesc.getTable(tableDescriptor); messageStream1.sendTo(table); messageStream1 .join(table, mock(StreamTableJoinFunction.class)) .join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2") .sendTo(output1); }, config); }
Example #24
Source File: StreamApplicationIntegrationTest.java From samza with Apache License 2.0 | 5 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("test"); KafkaInputDescriptor<KV<String, PageView>> isd = ksd.getInputDescriptor("PageView", KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>())); MessageStream<KV<String, TestData.PageView>> inputStream = appDescriptor.getInputStream(isd); inputStream .map(KV::getValue) .partitionBy(PageView::getMemberId, pv -> pv, KVSerde.of(new IntegerSerde(), new JsonSerdeV2<>(PageView.class)), "p1") .sink((m, collector, coordinator) -> collector.send(new OutgoingMessageEnvelope(new SystemStream("test", "Output"), m.getKey(), m.getKey(), m))); }
Example #25
Source File: TestPartitionByOperatorSpec.java From samza with Apache License 2.0 | 5 votes |
@Test(expected = IllegalArgumentException.class) public void testScheduledFunctionAsKeyFn() { ScheduledMapFn keyFn = new ScheduledMapFn(); new StreamApplicationDescriptorImpl(appDesc -> { MessageStream<Object> inputStream = appDesc.getInputStream(testInputDescriptor); inputStream.partitionBy(keyFn, m -> m, mock(KVSerde.class), "parByKey"); }, getConfig()); }
Example #26
Source File: TestOperatorImplGraph.java From samza with Apache License 2.0 | 5 votes |
@Test public void testBroadcastChain() { String inputStreamId = "input"; String inputSystem = "input-system"; String inputPhysicalName = "input-stream"; HashMap<String, String> configMap = new HashMap<>(); configMap.put(JobConfig.JOB_NAME, "test-job"); configMap.put(JobConfig.JOB_ID, "1"); StreamTestUtils.addStreamConfigs(configMap, inputStreamId, inputSystem, inputPhysicalName); Config config = new MapConfig(configMap); when(this.context.getJobContext().getConfig()).thenReturn(config); StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> { GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass"); GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class)); MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor); inputStream.filter(mock(FilterFunction.class)); inputStream.map(mock(MapFunction.class)); }, config); OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class)); InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName)); assertEquals(2, inputOpImpl.registeredOperators.size()); assertTrue(inputOpImpl.registeredOperators.stream() .anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.FILTER)); assertTrue(inputOpImpl.registeredOperators.stream() .anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.MAP)); }
Example #27
Source File: AzureBlobApplication.java From samza-hello-samza with Apache License 2.0 | 5 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { // Define a system descriptor for Kafka KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor("kafka").withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); KafkaInputDescriptor<PageView> pageViewInputDescriptor = kafkaSystemDescriptor.getInputDescriptor(INPUT_PAGEVIEW_STREAM_ID, new JsonSerdeV2<>(PageView.class)); // Define a system descriptor for Azure Blob Storage GenericSystemDescriptor azureBlobSystemDescriptor = new GenericSystemDescriptor(OUTPUT_SYSTEM, "org.apache.samza.system.azureblob.AzureBlobSystemFactory"); GenericOutputDescriptor<PageViewAvroRecord> azureBlobOuputDescriptor = azureBlobSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM, new NoOpSerde<>()); // Set Kafka as the default system for the job appDescriptor.withDefaultSystem(kafkaSystemDescriptor); // Define the input and output streams with descriptors MessageStream<PageView> pageViewInput = appDescriptor.getInputStream(pageViewInputDescriptor); OutputStream<PageViewAvroRecord> pageViewAvroRecordOutputStream = appDescriptor.getOutputStream(azureBlobOuputDescriptor); // Define the execution flow with the high-level API pageViewInput .map((message) -> { LOG.info("Sending: Received PageViewEvent with pageId: " + message.pageId); return PageViewAvroRecord.buildPageViewRecord(message); }) .sendTo(pageViewAvroRecordOutputStream); }
Example #28
Source File: TestPartitionByOperatorSpec.java From samza with Apache License 2.0 | 5 votes |
@Test public void testPartitionBy() { MapFunction<Object, String> keyFn = m -> m.toString(); MapFunction<Object, Object> valueFn = m -> m; KVSerde<Object, Object> partitionBySerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()); StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> { MessageStream inputStream = appDesc.getInputStream(testInputDescriptor); inputStream.partitionBy(keyFn, valueFn, partitionBySerde, testRepartitionedStreamName); }, getConfig()); assertEquals(2, streamAppDesc.getInputOperators().size()); Map<String, InputOperatorSpec> inputOpSpecs = streamAppDesc.getInputOperators(); assertTrue(inputOpSpecs.keySet().contains(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName))); InputOperatorSpec inputOpSpec = inputOpSpecs.get(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName)); assertEquals(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName), inputOpSpec.getStreamId()); assertTrue(inputOpSpec.getKeySerde() instanceof NoOpSerde); assertTrue(inputOpSpec.getValueSerde() instanceof NoOpSerde); assertTrue(inputOpSpec.isKeyed()); assertNull(inputOpSpec.getScheduledFn()); assertNull(inputOpSpec.getWatermarkFn()); InputOperatorSpec originInputSpec = inputOpSpecs.get(testInputDescriptor.getStreamId()); assertTrue(originInputSpec.getRegisteredOperatorSpecs().toArray()[0] instanceof PartitionByOperatorSpec); PartitionByOperatorSpec reparOpSpec = (PartitionByOperatorSpec) originInputSpec.getRegisteredOperatorSpecs().toArray()[0]; assertEquals(reparOpSpec.getOpId(), String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName)); assertEquals(reparOpSpec.getKeyFunction(), keyFn); assertEquals(reparOpSpec.getValueFunction(), valueFn); assertEquals(reparOpSpec.getOutputStream().getStreamId(), reparOpSpec.getOpId()); assertNull(reparOpSpec.getScheduledFn()); assertNull(reparOpSpec.getWatermarkFn()); }
Example #29
Source File: TestPartitionByOperatorSpec.java From samza with Apache License 2.0 | 5 votes |
@Test public void testCopy() { StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> { MessageStream inputStream = appDesc.getInputStream(testInputDescriptor); inputStream.partitionBy(m -> m.toString(), m -> m, mock(KVSerde.class), testRepartitionedStreamName); }, getConfig()); OperatorSpecGraph specGraph = streamAppDesc.getOperatorSpecGraph(); OperatorSpecGraph clonedGraph = specGraph.clone(); OperatorSpecTestUtils.assertClonedGraph(specGraph, clonedGraph); }
Example #30
Source File: StreamTableJoinExample.java From samza-hello-samza with Apache License 2.0 | 5 votes |
@Override public void describe(StreamApplicationDescriptor appDescriptor) { Serde<Profile> profileSerde = new JsonSerdeV2<>(Profile.class); Serde<PageView> pageViewSerde = new JsonSerdeV2<>(PageView.class); Serde<EnrichedPageView> joinResultSerde = new JsonSerdeV2<>(EnrichedPageView.class); KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME) .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); KafkaInputDescriptor<Profile> profileInputDescriptor = kafkaSystemDescriptor.getInputDescriptor(PROFILE_STREAM_ID, profileSerde); KafkaInputDescriptor<PageView> pageViewInputDescriptor = kafkaSystemDescriptor.getInputDescriptor(PAGEVIEW_STREAM_ID, pageViewSerde); KafkaOutputDescriptor<EnrichedPageView> joinResultOutputDescriptor = kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_TOPIC, joinResultSerde); RocksDbTableDescriptor<String, Profile> profileTableDescriptor = new RocksDbTableDescriptor<String, Profile>("profile-table", KVSerde.of(new StringSerde(), profileSerde)); appDescriptor.withDefaultSystem(kafkaSystemDescriptor); MessageStream<Profile> profileStream = appDescriptor.getInputStream(profileInputDescriptor); MessageStream<PageView> pageViewStream = appDescriptor.getInputStream(pageViewInputDescriptor); OutputStream<EnrichedPageView> joinResultStream = appDescriptor.getOutputStream(joinResultOutputDescriptor); Table<KV<String, Profile>> profileTable = appDescriptor.getTable(profileTableDescriptor); profileStream .map(profile -> KV.of(profile.userId, profile)) .sendTo(profileTable); pageViewStream .partitionBy(pv -> pv.userId, pv -> pv, KVSerde.of(new StringSerde(), pageViewSerde), "join") .join(profileTable, new JoinFn()) .sendTo(joinResultStream); }