org.apache.samza.task.StreamTaskFactory Java Examples
The following examples show how to use
org.apache.samza.task.StreamTaskFactory.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TransactionalStateIntegrationTest.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(TaskApplicationDescriptor appDescriptor) { KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(INPUT_SYSTEM); KVSerde<String, String> serde = KVSerde.of(new StringSerde(), new StringSerde()); KafkaInputDescriptor<KV<String, String>> isd = ksd.getInputDescriptor(INPUT_TOPIC, serde); RocksDbTableDescriptor<String, String> td = new RocksDbTableDescriptor<>(STORE_NAME, serde) .withChangelogStream(changelogTopic) .withChangelogReplicationFactor(1); appDescriptor .withInputStream(isd) .withTaskFactory((StreamTaskFactory) () -> new MyTask()) .withTable(td); }
Example #2
Source File: TransactionalStateMultiStoreIntegrationTest.java From samza with Apache License 2.0 | 6 votes |
@Override public void describe(TaskApplicationDescriptor appDescriptor) { KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(INPUT_SYSTEM); KVSerde<String, String> serde = KVSerde.of(new StringSerde(), new StringSerde()); KafkaInputDescriptor<KV<String, String>> isd = ksd.getInputDescriptor(INPUT_TOPIC, serde); RocksDbTableDescriptor<String, String> td1 = new RocksDbTableDescriptor<>(STORE_1_NAME, serde) .withChangelogStream(changelogTopic) .withChangelogReplicationFactor(1); RocksDbTableDescriptor<String, String> td2 = new RocksDbTableDescriptor<>(STORE_2_NAME, serde) .withChangelogStream(STORE_2_CHANGELOG) .withChangelogReplicationFactor(1); appDescriptor .withInputStream(isd) .withTaskFactory((StreamTaskFactory) () -> new MyTask()) .withTable(td1) .withTable(td2); }
Example #3
Source File: TestStreamProcessor.java From samza with Apache License 2.0 | 5 votes |
/** * Should fail to create a SamzaContainer when neither task factory nor task.class are provided. */ @Test(expected = SamzaException.class) public void testStreamProcessorWithNoTask() { final String testSystem = "test-system"; final String inputTopic = "numbers4"; final String outputTopic = "output4"; final int messageCount = 20; final Map<String, String> configMap = createConfigs(PROCESSOR_ID, testSystem, inputTopic, outputTopic, messageCount); configMap.remove("task.class"); final Config configs = new MapConfig(configMap); final TestStubs stubs = new TestStubs(configs, (StreamTaskFactory) null, bootstrapServers()); run(stubs.processor, stubs.shutdownLatch); }
Example #4
Source File: WikipediaStatsTaskApplication.java From samza-hello-samza with Apache License 2.0 | 5 votes |
@Override public void describe(TaskApplicationDescriptor taskApplicationDescriptor) { // Define a system descriptor for Kafka KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor("kafka") .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); // Input descriptor for the wikipedia-edits topic KafkaInputDescriptor kafkaInputDescriptor = kafkaSystemDescriptor.getInputDescriptor("wikipedia-edits", new JsonSerde<>()); // Set the default system descriptor to Kafka, so that it is used for all // internal resources, e.g., kafka topic for checkpointing, coordinator stream. taskApplicationDescriptor.withDefaultSystem(kafkaSystemDescriptor); // Set the input taskApplicationDescriptor.withInputStream(kafkaInputDescriptor); // Set the output taskApplicationDescriptor.withOutputStream( kafkaSystemDescriptor.getOutputDescriptor("wikipedia-stats", new JsonSerde<>())); // Set the task factory taskApplicationDescriptor.withTaskFactory((StreamTaskFactory) () -> new WikipediaStatsStreamTask()); }
Example #5
Source File: WikipediaParserTaskApplication.java From samza-hello-samza with Apache License 2.0 | 5 votes |
@Override public void describe(TaskApplicationDescriptor taskApplicationDescriptor) { // Define a system descriptor for Kafka, which is both our input and output system KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor("kafka").withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); // Input descriptor for the wikipedia-raw topic KafkaInputDescriptor kafkaInputDescriptor = kafkaSystemDescriptor.getInputDescriptor("wikipedia-raw", new JsonSerde<>()); // Output descriptor for the wikipedia-edits topic KafkaOutputDescriptor kafkaOutputDescriptor = kafkaSystemDescriptor.getOutputDescriptor("wikipedia-edits", new JsonSerde<>()); // Set the default system descriptor to Kafka, so that it is used for all // internal resources, e.g., kafka topic for checkpointing, coordinator stream. taskApplicationDescriptor.withDefaultSystem(kafkaSystemDescriptor); // Set the input taskApplicationDescriptor.withInputStream(kafkaInputDescriptor); // Set the output taskApplicationDescriptor.withOutputStream(kafkaOutputDescriptor); // Set the task factory taskApplicationDescriptor.withTaskFactory((StreamTaskFactory) () -> new WikipediaParserStreamTask()); }
Example #6
Source File: WikipediaFeedTaskApplication.java From samza-hello-samza with Apache License 2.0 | 5 votes |
@Override public void describe(TaskApplicationDescriptor taskApplicationDescriptor) { // Define a SystemDescriptor for Wikipedia data WikipediaSystemDescriptor wikipediaSystemDescriptor = new WikipediaSystemDescriptor("irc.wikimedia.org", 6667); // Define InputDescriptors for consuming wikipedia data WikipediaInputDescriptor wikipediaInputDescriptor = wikipediaSystemDescriptor.getInputDescriptor("en-wikipedia").withChannel("#en.wikipedia"); WikipediaInputDescriptor wiktionaryInputDescriptor = wikipediaSystemDescriptor.getInputDescriptor("en-wiktionary").withChannel("#en.wiktionary"); WikipediaInputDescriptor wikiNewsInputDescriptor = wikipediaSystemDescriptor.getInputDescriptor("en-wikinews").withChannel("#en.wikinews"); // Define a system descriptor for Kafka, which is our output system KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor("kafka").withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT) .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS) .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS); // Define an output descriptor KafkaOutputDescriptor kafkaOutputDescriptor = kafkaSystemDescriptor.getOutputDescriptor("wikipedia-raw", new JsonSerde<>()); // Set the default system descriptor to Kafka, so that it is used for all // internal resources, e.g., kafka topic for checkpointing, coordinator stream. taskApplicationDescriptor.withDefaultSystem(kafkaSystemDescriptor); // Set the inputs taskApplicationDescriptor.withInputStream(wikipediaInputDescriptor); taskApplicationDescriptor.withInputStream(wiktionaryInputDescriptor); taskApplicationDescriptor.withInputStream(wikiNewsInputDescriptor); // Set the output taskApplicationDescriptor.withOutputStream(kafkaOutputDescriptor); // Set the task factory taskApplicationDescriptor.withTaskFactory((StreamTaskFactory) () -> new WikipediaFeedStreamTask()); }
Example #7
Source File: FaultInjectionTest.java From samza with Apache License 2.0 | 5 votes |
@Override public void describe(TaskApplicationDescriptor appDescriptor) { Config config = appDescriptor.getConfig(); String inputTopic = config.get(INPUT_TOPIC_NAME_PROP); final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class); KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM); KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(inputTopic, serde); appDescriptor .withInputStream(isd) .withTaskFactory((StreamTaskFactory) () -> new FaultInjectionTask(containerShutdownLatch)); }
Example #8
Source File: StreamTaskIntegrationTest.java From samza with Apache License 2.0 | 5 votes |
@Override public void describe(TaskApplicationDescriptor appDescriptor) { KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("test"); KafkaInputDescriptor<Profile> profileISD = ksd.getInputDescriptor("Profile", new JsonSerdeV2<>()); KafkaInputDescriptor<PageView> pageViewISD = ksd.getInputDescriptor("PageView", new JsonSerdeV2<>()); KafkaOutputDescriptor<EnrichedPageView> enrichedPageViewOSD = ksd.getOutputDescriptor("EnrichedPageView", new NoOpSerde<>()); appDescriptor .withInputStream(profileISD) .withInputStream(pageViewISD) .withOutputStream(enrichedPageViewOSD) .withTable(new InMemoryTableDescriptor("profile-view-store", KVSerde.of(new IntegerSerde(), new TestTableData.ProfileJsonSerde()))) .withTaskFactory((StreamTaskFactory) () -> new StatefulStreamTask()); }
Example #9
Source File: TestLocalTableWithConfigRewriterEndToEnd.java From samza with Apache License 2.0 | 5 votes |
@Override public void describe(TaskApplicationDescriptor appDescriptor) { DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test"); GenericInputDescriptor<TestTableData.PageView> pageViewISD = ksd.getInputDescriptor("PageView", new NoOpSerde<>()); appDescriptor .withInputStream(pageViewISD) .withTaskFactory((StreamTaskFactory) () -> new MyStreamTask()); }
Example #10
Source File: TestLocalTableWithLowLevelApiEndToEnd.java From samza with Apache License 2.0 | 5 votes |
@Override public void describe(TaskApplicationDescriptor appDescriptor) { DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test"); GenericInputDescriptor<TestTableData.PageView> pageViewISD = ksd.getInputDescriptor("PageView", new NoOpSerde<>()); appDescriptor .withInputStream(pageViewISD) .withTable(new InMemoryTableDescriptor("t1", KVSerde.of(new IntegerSerde(), new TestTableData.PageViewJsonSerde()))) .withTaskFactory((StreamTaskFactory) () -> new MyStreamTask()); }
Example #11
Source File: TestStreamProcessor.java From samza with Apache License 2.0 | 5 votes |
public TestableStreamProcessor(Config config, Map<String, MetricsReporter> customMetricsReporters, StreamTaskFactory streamTaskFactory, ProcessorLifecycleListener processorListener, JobCoordinator jobCoordinator, SamzaContainer container) { this(config, customMetricsReporters, streamTaskFactory, processorListener, jobCoordinator, container, Duration.ZERO); }
Example #12
Source File: TaskApplicationExample.java From samza with Apache License 2.0 | 5 votes |
@Override public void describe(TaskApplicationDescriptor appDescriptor) { // add input and output streams KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("tracking"); KafkaInputDescriptor<String> isd = ksd.getInputDescriptor("myinput", new StringSerde()); KafkaOutputDescriptor<String> osd = ksd.getOutputDescriptor("myout", new StringSerde()); TableDescriptor td = new RocksDbTableDescriptor("mytable", new KVSerde(new NoOpSerde(), new NoOpSerde())); appDescriptor .withInputStream(isd) .withOutputStream(osd) .withTable(td) .withTaskFactory((StreamTaskFactory) () -> new MyStreamTask()); }
Example #13
Source File: TestStreamProcessor.java From samza with Apache License 2.0 | 5 votes |
public TestableStreamProcessor(Config config, Map<String, MetricsReporter> customMetricsReporters, StreamTaskFactory streamTaskFactory, ProcessorLifecycleListener processorListener, JobCoordinator jobCoordinator, SamzaContainer container, Duration runLoopShutdownDuration) { super("TEST_PROCESSOR_ID", config, customMetricsReporters, streamTaskFactory, Optional.empty(), Optional.empty(), Optional.empty(), sp -> processorListener, jobCoordinator, Mockito.mock(MetadataStore.class)); this.container = container; this.runLoopShutdownDuration = runLoopShutdownDuration; }
Example #14
Source File: TestStreamProcessor.java From samza with Apache License 2.0 | 4 votes |
TestStubs(Config config, StreamTaskFactory taskFactory, String bootstrapServer) { this(bootstrapServer); processor = new StreamProcessor("1", config, new HashMap<>(), taskFactory, listener); }
Example #15
Source File: TestZkStreamProcessorBase.java From samza with Apache License 2.0 | 4 votes |
protected StreamProcessor createStreamProcessor(final String pId, Map<String, String> map, final CountDownLatch waitStart, final CountDownLatch waitStop) { map.put(ApplicationConfig.PROCESSOR_ID, pId); Config config = new MapConfig(map); String jobCoordinatorFactoryClassName = new JobCoordinatorConfig(config).getJobCoordinatorFactoryClassName(); JobCoordinator jobCoordinator = ReflectionUtil.getObj(jobCoordinatorFactoryClassName, JobCoordinatorFactory.class) .getJobCoordinator(pId, config, new MetricsRegistryMap(), Mockito.mock(CoordinatorStreamStore.class)); ProcessorLifecycleListener listener = new ProcessorLifecycleListener() { @Override public void beforeStart() { } @Override public void afterStart() { if (waitStart != null) { waitStart.countDown(); } LOG.info("onStart is called for pid=" + pId); } @Override public void afterStop() { // stopped w/o failure if (waitStop != null) { waitStop.countDown(); } LOG.info("afterStop is called for pid=" + pId + " with successful shutdown"); } @Override public void afterFailure(Throwable t) { // stopped w/ failure LOG.info("afterStop is called for pid=" + pId + " with failure"); } }; StreamProcessor processor = new StreamProcessor(pId, config, new HashMap<>(), (StreamTaskFactory) TestStreamTask::new, listener, jobCoordinator); return processor; }
Example #16
Source File: TestStreamProcessor.java From samza with Apache License 2.0 | 4 votes |
/** * Tests that a failure in container correctly stops a running JobCoordinator and propagates the exception * through the StreamProcessor * * Assertions: * - JobCoordinator has been stopped from the JobCoordinatorListener callback * - ProcessorLifecycleListener#afterStop(Throwable) has been invoked w/ non-null Throwable */ @Test public void testContainerFailureCorrectlyStopsProcessor() throws InterruptedException { JobCoordinator mockJobCoordinator = Mockito.mock(JobCoordinator.class); Throwable expectedThrowable = new SamzaException("Failure in Container!"); AtomicReference<Throwable> actualThrowable = new AtomicReference<>(); final CountDownLatch runLoopStartedLatch = new CountDownLatch(1); RunLoop failingRunLoop = mock(RunLoop.class); doAnswer(invocation -> { try { runLoopStartedLatch.countDown(); throw expectedThrowable; } catch (InterruptedException ie) { ie.printStackTrace(); } return null; }).when(failingRunLoop).run(); SamzaContainer mockContainer = StreamProcessorTestUtils.getDummyContainer(failingRunLoop, mock(StreamTask.class)); final CountDownLatch processorListenerFailed = new CountDownLatch(1); TestableStreamProcessor processor = new TestableStreamProcessor( new MapConfig(), new HashMap<>(), mock(StreamTaskFactory.class), new ProcessorLifecycleListener() { @Override public void beforeStart() { processorListenerState.put(ListenerCallback.BEFORE_START, true); } @Override public void afterStart() { processorListenerState.put(ListenerCallback.AFTER_START, true); } @Override public void afterStop() { processorListenerState.put(ListenerCallback.AFTER_STOP, true); } @Override public void afterFailure(Throwable t) { processorListenerState.put(ListenerCallback.AFTER_FAILURE, true); actualThrowable.getAndSet(t); processorListenerFailed.countDown(); } }, mockJobCoordinator, mockContainer); final CountDownLatch coordinatorStop = new CountDownLatch(1); doAnswer(invocation -> { coordinatorStop.countDown(); return null; }).when(mockJobCoordinator).stop(); doAnswer(invocation -> { new Thread(() -> { try { processor.jobCoordinatorListener.onJobModelExpired(); processor.jobCoordinatorListener.onNewJobModel("1", getMockJobModel()); coordinatorStop.await(); processor.jobCoordinatorListener.onCoordinatorStop(); } catch (InterruptedException e) { e.printStackTrace(); } }).start(); return null; }).when(mockJobCoordinator).start(); processor.start(); // This block is required for the mockRunloop is actually started. // Otherwise, processor.stop gets triggered before mockRunloop begins to block runLoopStartedLatch.await(); assertTrue( "Container failed and processor listener failed was not invoked within timeout!", processorListenerFailed.await(30, TimeUnit.SECONDS)); assertEquals(expectedThrowable, actualThrowable.get()); assertTrue(processorListenerState.get(ListenerCallback.BEFORE_START)); assertTrue(processorListenerState.get(ListenerCallback.AFTER_START)); Assert.assertFalse(processorListenerState.get(ListenerCallback.AFTER_STOP)); assertTrue(processorListenerState.get(ListenerCallback.AFTER_FAILURE)); }
Example #17
Source File: TestStreamProcessor.java From samza with Apache License 2.0 | 4 votes |
/** * Given that the job model expires, but the container takes too long to stop, a TimeoutException should be propagated * to the processor lifecycle listener. */ @Test public void testJobModelExpiredContainerShutdownTimeout() throws InterruptedException { JobCoordinator mockJobCoordinator = mock(JobCoordinator.class); // use this to store the exception passed to afterFailure for the processor lifecycle listener AtomicReference<Throwable> afterFailureException = new AtomicReference<>(null); TestableStreamProcessor processor = new TestableStreamProcessor( // set a small shutdown timeout so it triggers faster new MapConfig(ImmutableMap.of(TaskConfig.TASK_SHUTDOWN_MS, "1")), new HashMap<>(), mock(StreamTaskFactory.class), new ProcessorLifecycleListener() { @Override public void beforeStart() { } @Override public void afterStart() { } @Override public void afterFailure(Throwable t) { afterFailureException.set(t); } @Override public void afterStop() { } }, mockJobCoordinator, null, // take an extra second to shut down so that task shutdown timeout gets reached Duration.of(1, ChronoUnit.SECONDS)); Thread jcThread = new Thread(() -> { // gets processor into rebalance mode so onNewJobModel creates a new container processor.jobCoordinatorListener.onJobModelExpired(); processor.jobCoordinatorListener.onNewJobModel("1", getMockJobModel()); try { // wait for the run loop to be ready before triggering rebalance processor.runLoopStartForMain.await(); } catch (InterruptedException e) { e.printStackTrace(); } processor.jobCoordinatorListener.onJobModelExpired(); }); doAnswer(invocation -> { jcThread.start(); return null; }).when(mockJobCoordinator).start(); // ensure that the coordinator stop occurred before checking the exception being thrown CountDownLatch coordinatorStop = new CountDownLatch(1); doAnswer(invocation -> { processor.jobCoordinatorListener.onCoordinatorStop(); coordinatorStop.countDown(); return null; }).when(mockJobCoordinator).stop(); processor.start(); // make sure the job model expired callback completed assertTrue("Job coordinator stop not called", coordinatorStop.await(10, TimeUnit.SECONDS)); assertNotNull(afterFailureException.get()); assertTrue(afterFailureException.get() instanceof TimeoutException); }
Example #18
Source File: TestStreamProcessor.java From samza with Apache License 2.0 | 4 votes |
/** * Tests stop() method when Container AND JobCoordinator are running */ @Test public void testStopByProcessor() throws InterruptedException { JobCoordinator mockJobCoordinator = mock(JobCoordinator.class); final CountDownLatch processorListenerStop = new CountDownLatch(1); final CountDownLatch processorListenerStart = new CountDownLatch(1); TestableStreamProcessor processor = new TestableStreamProcessor( new MapConfig(), new HashMap<>(), mock(StreamTaskFactory.class), new ProcessorLifecycleListener() { @Override public void afterStart() { processorListenerState.put(ListenerCallback.AFTER_START, true); processorListenerStart.countDown(); } @Override public void afterFailure(Throwable t) { processorListenerState.put(ListenerCallback.AFTER_FAILURE, true); } @Override public void afterStop() { processorListenerState.put(ListenerCallback.AFTER_STOP, true); processorListenerStop.countDown(); } @Override public void beforeStart() { processorListenerState.put(ListenerCallback.BEFORE_START, true); } }, mockJobCoordinator, null); final CountDownLatch coordinatorStop = new CountDownLatch(1); final Thread jcThread = new Thread(() -> { try { processor.jobCoordinatorListener.onJobModelExpired(); processor.jobCoordinatorListener.onNewJobModel("1", getMockJobModel()); coordinatorStop.await(); processor.jobCoordinatorListener.onCoordinatorStop(); } catch (InterruptedException e) { e.printStackTrace(); } }); doAnswer(invocation -> { coordinatorStop.countDown(); return null; }).when(mockJobCoordinator).stop(); doAnswer(invocation -> { jcThread.start(); return null; }).when(mockJobCoordinator).start(); processor.start(); processorListenerStart.await(10, TimeUnit.SECONDS); assertEquals(SamzaContainerStatus.STARTED, processor.getContainerStatus()); // This block is required for the mockRunloop is actually start. // Otherwise, processor.stop gets triggered before mockRunloop begins to block processor.runLoopStartForMain.await(); processor.stop(); processorListenerStop.await(); // Assertions on which callbacks are expected to be invoked assertTrue(processorListenerState.get(ListenerCallback.BEFORE_START)); assertTrue(processorListenerState.get(ListenerCallback.AFTER_START)); assertTrue(processorListenerState.get(ListenerCallback.AFTER_STOP)); Assert.assertFalse(processorListenerState.get(ListenerCallback.AFTER_FAILURE)); }