org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper Java Examples
The following examples show how to use
org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KafkaExample.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer<>( parameterTool.getRequired("output-topic"), new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()), parameterTool.getProperties(), FlinkKafkaProducer.Semantic.EXACTLY_ONCE)); env.execute("Modern Kafka Example"); }
Example #2
Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * This test is meant to assure that testAtLeastOnceProducer is valid by testing that if flushing is disabled, * the snapshot method does indeed finishes without waiting for pending records; * we set a timeout because the test will not finish if the logic is broken. */ @SuppressWarnings("unchecked") @Test(timeout = 5000) public void testDoesNotWaitForPendingRecordsIfFlushingDisabled() throws Throwable { final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); producer.setFlushOnCheckpoint(false); final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer(); final OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer)); testHarness.open(); testHarness.processElement(new StreamRecord<>("msg")); // make sure that all callbacks have not been completed verify(mockProducer, times(1)).send(any(ProducerRecord.class), any(Callback.class)); // should return even if there are pending records testHarness.snapshot(123L, 123L); testHarness.close(); }
Example #3
Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0 | 6 votes |
/** * This test is meant to assure that testAtLeastOnceProducer is valid by testing that if flushing is disabled, * the snapshot method does indeed finishes without waiting for pending records; * we set a timeout because the test will not finish if the logic is broken. */ @SuppressWarnings("unchecked") @Test(timeout = 5000) public void testDoesNotWaitForPendingRecordsIfFlushingDisabled() throws Throwable { final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); producer.setFlushOnCheckpoint(false); final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer(); final OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer)); testHarness.open(); testHarness.processElement(new StreamRecord<>("msg")); // make sure that all callbacks have not been completed verify(mockProducer, times(1)).send(any(ProducerRecord.class), any(Callback.class)); // should return even if there are pending records testHarness.snapshot(123L, 123L); testHarness.close(); }
Example #4
Source File: KafkaExample.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer<>( parameterTool.getRequired("output-topic"), new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()), parameterTool.getProperties(), FlinkKafkaProducer.Semantic.EXACTLY_ONCE)); env.execute("Modern Kafka Example"); }
Example #5
Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown. */ @Test public void testAsyncErrorRethrownOnInvoke() throws Throwable { final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer)); testHarness.open(); testHarness.processElement(new StreamRecord<>("msg-1")); // let the message request return an async exception producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception")); try { testHarness.processElement(new StreamRecord<>("msg-2")); } catch (Exception e) { // the next invoke should rethrow the async exception Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception")); // test succeeded return; } Assert.fail(); }
Example #6
Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that the constructor eagerly checks bootstrap servers are set in config. */ @Test(expected = IllegalArgumentException.class) public void testInstantiationFailsWhenBootstrapServersMissing() throws Exception { // no bootstrap servers set in props Properties props = new Properties(); // should throw IllegalArgumentException new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); }
Example #7
Source File: KafkaTableSink.java From flink with Apache License 2.0 | 5 votes |
@Override protected SinkFunction<Row> createKafkaProducer( String topic, Properties properties, SerializationSchema<Row> serializationSchema, Optional<FlinkKafkaPartitioner<Row>> partitioner) { return new FlinkKafkaProducer<>( topic, new KeyedSerializationSchemaWrapper<>(serializationSchema), properties, partitioner); }
Example #8
Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that constructor defaults to key value serializers in config to byte array deserializers if not set. */ @Test public void testKeyValueDeserializersSetIfMissing() throws Exception { Properties props = new Properties(); props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345"); // should set missing key value deserializers new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); assertTrue(props.containsKey(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG)); assertTrue(props.containsKey(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG)); assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName())); assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName())); }
Example #9
Source File: Kafka011TableSink.java From flink with Apache License 2.0 | 5 votes |
@Override protected SinkFunction<Row> createKafkaProducer( String topic, Properties properties, SerializationSchema<Row> serializationSchema, Optional<FlinkKafkaPartitioner<Row>> partitioner) { return new FlinkKafkaProducer011<>( topic, new KeyedSerializationSchemaWrapper<>(serializationSchema), properties, partitioner); }
Example #10
Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that partitions list is determinate and correctly provided to custom partitioner. */ @SuppressWarnings("unchecked") @Test public void testPartitionerInvokedWithDeterminatePartitionList() throws Exception { FlinkKafkaPartitioner<String> mockPartitioner = mock(FlinkKafkaPartitioner.class); RuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class); when(mockRuntimeContext.getIndexOfThisSubtask()).thenReturn(0); when(mockRuntimeContext.getNumberOfParallelSubtasks()).thenReturn(1); // out-of-order list of 4 partitions List<PartitionInfo> mockPartitionsList = new ArrayList<>(4); mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 3, null, null, null)); mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 1, null, null, null)); mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 0, null, null, null)); mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 2, null, null, null)); final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), mockPartitioner); producer.setRuntimeContext(mockRuntimeContext); final KafkaProducer mockProducer = producer.getMockKafkaProducer(); when(mockProducer.partitionsFor(anyString())).thenReturn(mockPartitionsList); when(mockProducer.metrics()).thenReturn(null); producer.open(new Configuration()); verify(mockPartitioner, times(1)).open(0, 1); producer.invoke("foobar", SinkContextUtil.forTimestamp(0)); verify(mockPartitioner, times(1)).partition( "foobar", null, "foobar".getBytes(), DummyFlinkKafkaProducer.DUMMY_TOPIC, new int[] {0, 1, 2, 3}); }
Example #11
Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that partitions list is determinate and correctly provided to custom partitioner. */ @SuppressWarnings("unchecked") @Test public void testPartitionerInvokedWithDeterminatePartitionList() throws Exception { FlinkKafkaPartitioner<String> mockPartitioner = mock(FlinkKafkaPartitioner.class); RuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class); when(mockRuntimeContext.getIndexOfThisSubtask()).thenReturn(0); when(mockRuntimeContext.getNumberOfParallelSubtasks()).thenReturn(1); // out-of-order list of 4 partitions List<PartitionInfo> mockPartitionsList = new ArrayList<>(4); mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 3, null, null, null)); mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 1, null, null, null)); mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 0, null, null, null)); mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 2, null, null, null)); final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), mockPartitioner); producer.setRuntimeContext(mockRuntimeContext); final KafkaProducer mockProducer = producer.getMockKafkaProducer(); when(mockProducer.partitionsFor(anyString())).thenReturn(mockPartitionsList); when(mockProducer.metrics()).thenReturn(null); producer.open(new Configuration()); verify(mockPartitioner, times(1)).open(0, 1); producer.invoke("foobar", SinkContextUtil.forTimestamp(0)); verify(mockPartitioner, times(1)).partition( "foobar", null, "foobar".getBytes(), DummyFlinkKafkaProducer.DUMMY_TOPIC, new int[] {0, 1, 2, 3}); }
Example #12
Source File: KafkaTableSink.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override protected SinkFunction<Row> createKafkaProducer( String topic, Properties properties, SerializationSchema<Row> serializationSchema, Optional<FlinkKafkaPartitioner<Row>> partitioner) { return new FlinkKafkaProducer<>( topic, new KeyedSerializationSchemaWrapper<>(serializationSchema), properties, partitioner); }
Example #13
Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test ensuring that if a snapshot call happens right after an async exception is caught, it should be rethrown. */ @Test public void testAsyncErrorRethrownOnCheckpoint() throws Throwable { final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer)); testHarness.open(); testHarness.processElement(new StreamRecord<>("msg-1")); // let the message request return an async exception producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception")); try { testHarness.snapshot(123L, 123L); } catch (Exception e) { // the next invoke should rethrow the async exception Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception")); // test succeeded return; } Assert.fail(); }
Example #14
Source File: Kafka011TableSink.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override protected SinkFunction<Row> createKafkaProducer( String topic, Properties properties, SerializationSchema<Row> serializationSchema, Optional<FlinkKafkaPartitioner<Row>> partitioner) { return new FlinkKafkaProducer011<>( topic, new KeyedSerializationSchemaWrapper<>(serializationSchema), properties, partitioner); }
Example #15
Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Test ensuring that if a snapshot call happens right after an async exception is caught, it should be rethrown. */ @Test public void testAsyncErrorRethrownOnCheckpoint() throws Throwable { final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer)); testHarness.open(); testHarness.processElement(new StreamRecord<>("msg-1")); // let the message request return an async exception producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception")); try { testHarness.snapshot(123L, 123L); } catch (Exception e) { // the next invoke should rethrow the async exception Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception")); // test succeeded return; } Assert.fail(); }
Example #16
Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown. */ @Test public void testAsyncErrorRethrownOnInvoke() throws Throwable { final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer)); testHarness.open(); testHarness.processElement(new StreamRecord<>("msg-1")); // let the message request return an async exception producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception")); try { testHarness.processElement(new StreamRecord<>("msg-2")); } catch (Exception e) { // the next invoke should rethrow the async exception Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception")); // test succeeded return; } Assert.fail(); }
Example #17
Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that the constructor eagerly checks bootstrap servers are set in config. */ @Test(expected = IllegalArgumentException.class) public void testInstantiationFailsWhenBootstrapServersMissing() throws Exception { // no bootstrap servers set in props Properties props = new Properties(); // should throw IllegalArgumentException new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); }
Example #18
Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that constructor defaults to key value serializers in config to byte array deserializers if not set. */ @Test public void testKeyValueDeserializersSetIfMissing() throws Exception { Properties props = new Properties(); props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345"); // should set missing key value deserializers new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); assertTrue(props.containsKey(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG)); assertTrue(props.containsKey(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG)); assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName())); assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName())); }
Example #19
Source File: KafkaShortRetentionTestBase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public void runAutoOffsetResetTest() throws Exception { final String topic = "auto-offset-reset-test"; final int parallelism = 1; final int elementsPerPartition = 50000; Properties tprops = new Properties(); tprops.setProperty("retention.ms", "250"); kafkaServer.createTestTopic(topic, parallelism, 1, tprops); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately env.getConfig().disableSysoutLogging(); // ----------- add producer dataflow ---------- DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() { private boolean running = true; @Override public void run(SourceContext<String> ctx) throws InterruptedException { int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition; int limit = cnt + elementsPerPartition; while (running && !stopProducer && cnt < limit) { ctx.collect("element-" + cnt); cnt++; Thread.sleep(10); } LOG.info("Stopping producer"); } @Override public void cancel() { running = false; } }); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), props, null); // ----------- add consumer dataflow ---------- NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema(); FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props); DataStreamSource<String> consuming = env.addSource(source); consuming.addSink(new DiscardingSink<String>()); tryExecute(env, "run auto offset reset test"); kafkaServer.deleteTestTopic(topic); }
Example #20
Source File: KafkaConsumerTestBase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
protected String writeSequence( String baseTopicName, final int numElements, final int parallelism, final int replicationFactor) throws Exception { LOG.info("\n===================================\n" + "== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" + "==================================="); final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {}); final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema = new KeyedSerializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KafkaDeserializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); final int maxNumAttempts = 10; for (int attempt = 1; attempt <= maxNumAttempts; attempt++) { final String topicName = baseTopicName + '-' + attempt; LOG.info("Writing attempt #" + attempt); // -------- Write the Sequence -------- createTestTopic(topicName, parallelism, replicationFactor); StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment(); writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart()); writeEnv.getConfig().disableSysoutLogging(); DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() { private boolean running = true; @Override public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception { int cnt = 0; int partition = getRuntimeContext().getIndexOfThisSubtask(); while (running && cnt < numElements) { ctx.collect(new Tuple2<>(partition, cnt)); cnt++; } } @Override public void cancel() { running = false; } }).setParallelism(parallelism); // the producer must not produce duplicates Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings); producerProperties.setProperty("retries", "0"); producerProperties.putAll(secureProps); kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism)) .setParallelism(parallelism); try { writeEnv.execute("Write sequence"); } catch (Exception e) { LOG.error("Write attempt failed, trying again", e); deleteTestTopic(topicName); waitUntilNoJobIsRunning(client); continue; } LOG.info("Finished writing sequence"); // -------- Validate the Sequence -------- // we need to validate the sequence, because kafka's producers are not exactly once LOG.info("Validating sequence"); waitUntilNoJobIsRunning(client); if (validateSequence(topicName, parallelism, deserSchema, numElements)) { // everything is good! return topicName; } else { deleteTestTopic(topicName); // fall through the loop } } throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts"); }
Example #21
Source File: KafkaProducerTestBase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * This test sets KafkaProducer so that it will automatically flush the data and * and fails the broker to check whether flushed records since last checkpoint were not duplicated. */ protected void testExactlyOnce(boolean regularSink, int sinksCount) throws Exception { final String topic = (regularSink ? "exactlyOnceTopicRegularSink" : "exactlyTopicCustomOperator") + sinksCount; final int partition = 0; final int numElements = 1000; final int failAfterElements = 333; for (int i = 0; i < sinksCount; i++) { createTestTopic(topic + i, 1, 1); } TypeInformationSerializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig()); KeyedSerializationSchema<Integer> keyedSerializationSchema = new KeyedSerializationSchemaWrapper<>(schema); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(500); env.setParallelism(1); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); env.getConfig().disableSysoutLogging(); Properties properties = new Properties(); properties.putAll(standardProps); properties.putAll(secureProps); // process exactly failAfterElements number of elements and then shutdown Kafka broker and fail application List<Integer> expectedElements = getIntegersSequence(numElements); DataStream<Integer> inputStream = env .addSource(new IntegerSource(numElements)) .map(new FailingIdentityMapper<Integer>(failAfterElements)); for (int i = 0; i < sinksCount; i++) { FlinkKafkaPartitioner<Integer> partitioner = new FlinkKafkaPartitioner<Integer>() { @Override public int partition(Integer record, byte[] key, byte[] value, String targetTopic, int[] partitions) { return partition; } }; if (regularSink) { StreamSink<Integer> kafkaSink = kafkaServer.getProducerSink(topic + i, keyedSerializationSchema, properties, partitioner); inputStream.addSink(kafkaSink.getUserFunction()); } else { kafkaServer.produceIntoKafka(inputStream, topic + i, keyedSerializationSchema, properties, partitioner); } } FailingIdentityMapper.failedBefore = false; TestUtils.tryExecute(env, "Exactly once test"); for (int i = 0; i < sinksCount; i++) { // assert that before failure we successfully snapshot/flushed all expected elements assertExactlyOnceForTopic( properties, topic + i, partition, expectedElements, KAFKA_READ_TIMEOUT); deleteTestTopic(topic + i); } }
Example #22
Source File: FlinkKafkaProducer.java From flink with Apache License 2.0 | 4 votes |
/** * @deprecated Use {@link FlinkKafkaProducer08#FlinkKafkaProducer08(String, SerializationSchema, Properties, KafkaPartitioner)} */ @Deprecated public FlinkKafkaProducer(String topicId, SerializationSchema<IN> serializationSchema, Properties producerConfig, KafkaPartitioner customPartitioner) { super(topicId, new KeyedSerializationSchemaWrapper<>(serializationSchema), producerConfig, customPartitioner); }
Example #23
Source File: FlinkKafkaProducer.java From flink with Apache License 2.0 | 4 votes |
/** * @deprecated Use {@link FlinkKafkaProducer08#FlinkKafkaProducer08(String, SerializationSchema, Properties)} */ @Deprecated public FlinkKafkaProducer(String topicId, SerializationSchema<IN> serializationSchema, Properties producerConfig) { super(topicId, new KeyedSerializationSchemaWrapper<>(serializationSchema), producerConfig, (FlinkKafkaPartitioner<IN>) null); }
Example #24
Source File: FlinkKafkaProducer.java From flink with Apache License 2.0 | 4 votes |
/** * @deprecated Use {@link FlinkKafkaProducer08#FlinkKafkaProducer08(String, String, SerializationSchema)} */ @Deprecated public FlinkKafkaProducer(String brokerList, String topicId, SerializationSchema<IN> serializationSchema) { super(topicId, new KeyedSerializationSchemaWrapper<>(serializationSchema), getPropertiesFromBrokerList(brokerList), (FlinkKafkaPartitioner<IN>) null); }
Example #25
Source File: KafkaConsumerTestBase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
protected void writeAppendSequence( String topicName, final int originalNumElements, final int numElementsToAppend, final int parallelism) throws Exception { LOG.info("\n===================================\n" + "== Appending sequence of " + numElementsToAppend + " into " + topicName + "==================================="); final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {}); final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema = new KeyedSerializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KafkaDeserializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); // -------- Write the append sequence -------- StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment(); writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart()); writeEnv.getConfig().disableSysoutLogging(); DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() { private boolean running = true; @Override public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception { int cnt = originalNumElements; int partition = getRuntimeContext().getIndexOfThisSubtask(); while (running && cnt < numElementsToAppend + originalNumElements) { ctx.collect(new Tuple2<>(partition, cnt)); cnt++; } } @Override public void cancel() { running = false; } }).setParallelism(parallelism); // the producer must not produce duplicates Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings); producerProperties.setProperty("retries", "0"); producerProperties.putAll(secureProps); kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism)) .setParallelism(parallelism); try { writeEnv.execute("Write sequence"); } catch (Exception e) { throw new Exception("Failed to append sequence to Kafka; append job failed.", e); } LOG.info("Finished writing append sequence"); // we need to validate the sequence, because kafka's producers are not exactly once LOG.info("Validating sequence"); while (!getRunningJobs(client).isEmpty()){ Thread.sleep(50); } if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) { throw new Exception("Could not append a valid sequence to Kafka."); } }
Example #26
Source File: Kafka010ITCase.java From flink with Apache License 2.0 | 4 votes |
/** * Kafka 0.10 specific test, ensuring Timestamps are properly written to and read from Kafka. */ @Ignore("This test is disabled because of: https://issues.apache.org/jira/browse/FLINK-9217") @Test(timeout = 60000) public void testTimestamps() throws Exception { final String topic = "tstopic"; createTestTopic(topic, 3, 1); // ---------- Produce an event time stream into Kafka ------------------- StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env.getConfig().disableSysoutLogging(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() { private static final long serialVersionUID = -2255105836471289626L; boolean running = true; @Override public void run(SourceContext<Long> ctx) throws Exception { long i = 0; while (running) { ctx.collectWithTimestamp(i, i * 2); if (i++ == 1000L) { running = false; } } } @Override public void cancel() { running = false; } }); final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig()); FlinkKafkaProducer010.FlinkKafkaProducer010Configuration prod = FlinkKafkaProducer010.writeToKafkaWithTimestamps(streamWithTimestamps, topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, new FlinkKafkaPartitioner<Long>() { private static final long serialVersionUID = -6730989584364230617L; @Override public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) { return (int) (next % 3); } }); prod.setParallelism(3); prod.setWriteTimestampToKafka(true); env.execute("Produce some"); // ---------- Consume stream from Kafka ------------------- env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env.getConfig().disableSysoutLogging(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); FlinkKafkaConsumer010<Long> kafkaSource = new FlinkKafkaConsumer010<>(topic, new LimitedLongDeserializer(), standardProps); kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() { private static final long serialVersionUID = -4834111073247835189L; @Nullable @Override public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) { if (lastElement % 10 == 0) { return new Watermark(lastElement); } return null; } @Override public long extractTimestamp(Long element, long previousElementTimestamp) { return previousElementTimestamp; } }); DataStream<Long> stream = env.addSource(kafkaSource); GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class); stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1); env.execute("Consume again"); deleteTestTopic(topic); }
Example #27
Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Test ensuring that the producer is not dropping buffered records; * we set a timeout because the test will not finish if the logic is broken. */ @SuppressWarnings("unchecked") @Test(timeout = 10000) public void testAtLeastOnceProducer() throws Throwable { final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); producer.setFlushOnCheckpoint(true); final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer(); final OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer)); testHarness.open(); testHarness.processElement(new StreamRecord<>("msg-1")); testHarness.processElement(new StreamRecord<>("msg-2")); testHarness.processElement(new StreamRecord<>("msg-3")); verify(mockProducer, times(3)).send(any(ProducerRecord.class), any(Callback.class)); Assert.assertEquals(3, producer.getPendingSize()); // start a thread to perform checkpointing CheckedThread snapshotThread = new CheckedThread() { @Override public void go() throws Exception { // this should block until all records are flushed; // if the snapshot implementation returns before pending records are flushed, testHarness.snapshot(123L, 123L); } }; snapshotThread.start(); // before proceeding, make sure that flushing has started and that the snapshot is still blocked; // this would block forever if the snapshot didn't perform a flush producer.waitUntilFlushStarted(); Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive()); // now, complete the callbacks producer.getPendingCallbacks().get(0).onCompletion(null, null); Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive()); Assert.assertEquals(2, producer.getPendingSize()); producer.getPendingCallbacks().get(1).onCompletion(null, null); Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive()); Assert.assertEquals(1, producer.getPendingSize()); producer.getPendingCallbacks().get(2).onCompletion(null, null); Assert.assertEquals(0, producer.getPendingSize()); // this would fail with an exception if flushing wasn't completed before the snapshot method returned snapshotThread.sync(); testHarness.close(); }
Example #28
Source File: KafkaITCase.java From flink with Apache License 2.0 | 4 votes |
/** * Kafka 20 specific test, ensuring Timestamps are properly written to and read from Kafka. */ @Test(timeout = 60000) public void testTimestamps() throws Exception { final String topic = "tstopic"; createTestTopic(topic, 3, 1); // ---------- Produce an event time stream into Kafka ------------------- StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env.getConfig().disableSysoutLogging(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() { private static final long serialVersionUID = -2255115836471289626L; boolean running = true; @Override public void run(SourceContext<Long> ctx) throws Exception { long i = 0; while (running) { ctx.collectWithTimestamp(i, i * 2); if (i++ == 1110L) { running = false; } } } @Override public void cancel() { running = false; } }); final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig()); FlinkKafkaProducer<Long> prod = new FlinkKafkaProducer<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() { private static final long serialVersionUID = -6730989584364230617L; @Override public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) { return (int) (next % 3); } })); prod.setWriteTimestampToKafka(true); streamWithTimestamps.addSink(prod).setParallelism(3); env.execute("Produce some"); // ---------- Consume stream from Kafka ------------------- env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env.getConfig().disableSysoutLogging(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); FlinkKafkaConsumer<Long> kafkaSource = new FlinkKafkaConsumer<>(topic, new KafkaITCase.LimitedLongDeserializer(), standardProps); kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() { private static final long serialVersionUID = -4834111173247835189L; @Nullable @Override public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) { if (lastElement % 11 == 0) { return new Watermark(lastElement); } return null; } @Override public long extractTimestamp(Long element, long previousElementTimestamp) { return previousElementTimestamp; } }); DataStream<Long> stream = env.addSource(kafkaSource); GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class); stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1); env.execute("Consume again"); deleteTestTopic(topic); }
Example #29
Source File: Kafka011ITCase.java From flink with Apache License 2.0 | 4 votes |
/** * Kafka 0.11 specific test, ensuring Timestamps are properly written to and read from Kafka. */ @Test(timeout = 60000) public void testTimestamps() throws Exception { final String topic = "tstopic"; createTestTopic(topic, 3, 1); // ---------- Produce an event time stream into Kafka ------------------- StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env.getConfig().disableSysoutLogging(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() { private static final long serialVersionUID = -2255115836471289626L; boolean running = true; @Override public void run(SourceContext<Long> ctx) throws Exception { long i = 0; while (running) { ctx.collectWithTimestamp(i, i * 2); if (i++ == 1110L) { running = false; } } } @Override public void cancel() { running = false; } }); final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig()); FlinkKafkaProducer011<Long> prod = new FlinkKafkaProducer011<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() { private static final long serialVersionUID = -6730989584364230617L; @Override public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) { return (int) (next % 3); } })); prod.setWriteTimestampToKafka(true); streamWithTimestamps.addSink(prod).setParallelism(3); env.execute("Produce some"); // ---------- Consume stream from Kafka ------------------- env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env.getConfig().disableSysoutLogging(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); FlinkKafkaConsumer011<Long> kafkaSource = new FlinkKafkaConsumer011<>(topic, new LimitedLongDeserializer(), standardProps); kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() { private static final long serialVersionUID = -4834111173247835189L; @Nullable @Override public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) { if (lastElement % 11 == 0) { return new Watermark(lastElement); } return null; } @Override public long extractTimestamp(Long element, long previousElementTimestamp) { return previousElementTimestamp; } }); DataStream<Long> stream = env.addSource(kafkaSource); GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class); stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1); env.execute("Consume again"); deleteTestTopic(topic); }
Example #30
Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Test ensuring that if an async exception is caught for one of the flushed requests on checkpoint, * it should be rethrown; we set a timeout because the test will not finish if the logic is broken. * * <p>Note that this test does not test the snapshot method is blocked correctly when there are pending records. * The test for that is covered in testAtLeastOnceProducer. */ @SuppressWarnings("unchecked") @Test(timeout = 5000) public void testAsyncErrorRethrownOnCheckpointAfterFlush() throws Throwable { final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); producer.setFlushOnCheckpoint(true); final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer(); final OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer)); testHarness.open(); testHarness.processElement(new StreamRecord<>("msg-1")); testHarness.processElement(new StreamRecord<>("msg-2")); testHarness.processElement(new StreamRecord<>("msg-3")); verify(mockProducer, times(3)).send(any(ProducerRecord.class), any(Callback.class)); // only let the first callback succeed for now producer.getPendingCallbacks().get(0).onCompletion(null, null); CheckedThread snapshotThread = new CheckedThread() { @Override public void go() throws Exception { // this should block at first, since there are still two pending records that needs to be flushed testHarness.snapshot(123L, 123L); } }; snapshotThread.start(); // let the 2nd message fail with an async exception producer.getPendingCallbacks().get(1).onCompletion(null, new Exception("artificial async failure for 2nd message")); producer.getPendingCallbacks().get(2).onCompletion(null, null); try { snapshotThread.sync(); } catch (Exception e) { // the snapshot should have failed with the async exception Assert.assertTrue(e.getCause().getMessage().contains("artificial async failure for 2nd message")); // test succeeded return; } Assert.fail(); }