org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper Java Exaples

Source File: KafkaExample.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}

Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * This test is meant to assure that testAtLeastOnceProducer is valid by testing that if flushing is disabled,
 * the snapshot method does indeed finishes without waiting for pending records;
 * we set a timeout because the test will not finish if the logic is broken.
 */
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testDoesNotWaitForPendingRecordsIfFlushingDisabled() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
	producer.setFlushOnCheckpoint(false);

	final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();

	final OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg"));

	// make sure that all callbacks have not been completed
	verify(mockProducer, times(1)).send(any(ProducerRecord.class), any(Callback.class));

	// should return even if there are pending records
	testHarness.snapshot(123L, 123L);

	testHarness.close();
}

Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0

6 votes

/**
 * This test is meant to assure that testAtLeastOnceProducer is valid by testing that if flushing is disabled,
 * the snapshot method does indeed finishes without waiting for pending records;
 * we set a timeout because the test will not finish if the logic is broken.
 */
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testDoesNotWaitForPendingRecordsIfFlushingDisabled() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
	producer.setFlushOnCheckpoint(false);

	final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();

	final OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg"));

	// make sure that all callbacks have not been completed
	verify(mockProducer, times(1)).send(any(ProducerRecord.class), any(Callback.class));

	// should return even if there are pending records
	testHarness.snapshot(123L, 123L);

	testHarness.close();
}

Source File: KafkaExample.java From Flink-CEPplus with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}

Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0

5 votes

/**
 * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnInvoke() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.processElement(new StreamRecord<>("msg-2"));
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}

Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0

5 votes

/**
 * Tests that the constructor eagerly checks bootstrap servers are set in config.
 */
@Test(expected = IllegalArgumentException.class)
public void testInstantiationFailsWhenBootstrapServersMissing() throws Exception {
	// no bootstrap servers set in props
	Properties props = new Properties();
	// should throw IllegalArgumentException
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
}

Source File: KafkaTableSink.java From flink with Apache License 2.0

5 votes

@Override
protected SinkFunction<Row> createKafkaProducer(
	String topic,
	Properties properties,
	SerializationSchema<Row> serializationSchema,
	Optional<FlinkKafkaPartitioner<Row>> partitioner) {
	return new FlinkKafkaProducer<>(
		topic,
		new KeyedSerializationSchemaWrapper<>(serializationSchema),
		properties,
		partitioner);
}

Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0

5 votes

/**
 * Tests that constructor defaults to key value serializers in config to byte array deserializers if not set.
 */
@Test
public void testKeyValueDeserializersSetIfMissing() throws Exception {
	Properties props = new Properties();
	props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345");
	// should set missing key value deserializers
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	assertTrue(props.containsKey(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.containsKey(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
}

Source File: Kafka011TableSink.java From flink with Apache License 2.0

5 votes

@Override
protected SinkFunction<Row> createKafkaProducer(
		String topic,
		Properties properties,
		SerializationSchema<Row> serializationSchema,
		Optional<FlinkKafkaPartitioner<Row>> partitioner) {
	return new FlinkKafkaProducer011<>(
		topic,
		new KeyedSerializationSchemaWrapper<>(serializationSchema),
		properties,
		partitioner);
}

Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0

5 votes

/**
 * Tests that partitions list is determinate and correctly provided to custom partitioner.
 */
@SuppressWarnings("unchecked")
@Test
public void testPartitionerInvokedWithDeterminatePartitionList() throws Exception {
	FlinkKafkaPartitioner<String> mockPartitioner = mock(FlinkKafkaPartitioner.class);

	RuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
	when(mockRuntimeContext.getIndexOfThisSubtask()).thenReturn(0);
	when(mockRuntimeContext.getNumberOfParallelSubtasks()).thenReturn(1);

	// out-of-order list of 4 partitions
	List<PartitionInfo> mockPartitionsList = new ArrayList<>(4);
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 3, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 1, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 0, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 2, null, null, null));

	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), mockPartitioner);
	producer.setRuntimeContext(mockRuntimeContext);

	final KafkaProducer mockProducer = producer.getMockKafkaProducer();
	when(mockProducer.partitionsFor(anyString())).thenReturn(mockPartitionsList);
	when(mockProducer.metrics()).thenReturn(null);

	producer.open(new Configuration());
	verify(mockPartitioner, times(1)).open(0, 1);

	producer.invoke("foobar", SinkContextUtil.forTimestamp(0));
	verify(mockPartitioner, times(1)).partition(
		"foobar", null, "foobar".getBytes(), DummyFlinkKafkaProducer.DUMMY_TOPIC, new int[] {0, 1, 2, 3});
}

Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Tests that partitions list is determinate and correctly provided to custom partitioner.
 */
@SuppressWarnings("unchecked")
@Test
public void testPartitionerInvokedWithDeterminatePartitionList() throws Exception {
	FlinkKafkaPartitioner<String> mockPartitioner = mock(FlinkKafkaPartitioner.class);

	RuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
	when(mockRuntimeContext.getIndexOfThisSubtask()).thenReturn(0);
	when(mockRuntimeContext.getNumberOfParallelSubtasks()).thenReturn(1);

	// out-of-order list of 4 partitions
	List<PartitionInfo> mockPartitionsList = new ArrayList<>(4);
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 3, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 1, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 0, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 2, null, null, null));

	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), mockPartitioner);
	producer.setRuntimeContext(mockRuntimeContext);

	final KafkaProducer mockProducer = producer.getMockKafkaProducer();
	when(mockProducer.partitionsFor(anyString())).thenReturn(mockPartitionsList);
	when(mockProducer.metrics()).thenReturn(null);

	producer.open(new Configuration());
	verify(mockPartitioner, times(1)).open(0, 1);

	producer.invoke("foobar", SinkContextUtil.forTimestamp(0));
	verify(mockPartitioner, times(1)).partition(
		"foobar", null, "foobar".getBytes(), DummyFlinkKafkaProducer.DUMMY_TOPIC, new int[] {0, 1, 2, 3});
}

Source File: KafkaTableSink.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected SinkFunction<Row> createKafkaProducer(
	String topic,
	Properties properties,
	SerializationSchema<Row> serializationSchema,
	Optional<FlinkKafkaPartitioner<Row>> partitioner) {
	return new FlinkKafkaProducer<>(
		topic,
		new KeyedSerializationSchemaWrapper<>(serializationSchema),
		properties,
		partitioner);
}

Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0

5 votes

/**
 * Test ensuring that if a snapshot call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnCheckpoint() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.snapshot(123L, 123L);
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}

Source File: Kafka011TableSink.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected SinkFunction<Row> createKafkaProducer(
		String topic,
		Properties properties,
		SerializationSchema<Row> serializationSchema,
		Optional<FlinkKafkaPartitioner<Row>> partitioner) {
	return new FlinkKafkaProducer011<>(
		topic,
		new KeyedSerializationSchemaWrapper<>(serializationSchema),
		properties,
		partitioner);
}

Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Test ensuring that if a snapshot call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnCheckpoint() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.snapshot(123L, 123L);
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}

Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnInvoke() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.processElement(new StreamRecord<>("msg-2"));
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}

Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Tests that the constructor eagerly checks bootstrap servers are set in config.
 */
@Test(expected = IllegalArgumentException.class)
public void testInstantiationFailsWhenBootstrapServersMissing() throws Exception {
	// no bootstrap servers set in props
	Properties props = new Properties();
	// should throw IllegalArgumentException
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
}

Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Tests that constructor defaults to key value serializers in config to byte array deserializers if not set.
 */
@Test
public void testKeyValueDeserializersSetIfMissing() throws Exception {
	Properties props = new Properties();
	props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345");
	// should set missing key value deserializers
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	assertTrue(props.containsKey(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.containsKey(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
}

Source File: KafkaShortRetentionTestBase.java From Flink-CEPplus with Apache License 2.0

4 votes

public void runAutoOffsetResetTest() throws Exception {
	final String topic = "auto-offset-reset-test";

	final int parallelism = 1;
	final int elementsPerPartition = 50000;

	Properties tprops = new Properties();
	tprops.setProperty("retention.ms", "250");
	kafkaServer.createTestTopic(topic, parallelism, 1, tprops);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately
	env.getConfig().disableSysoutLogging();

	// ----------- add producer dataflow ----------

	DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<String> ctx) throws InterruptedException {
			int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition;
			int limit = cnt + elementsPerPartition;

			while (running && !stopProducer && cnt < limit) {
				ctx.collect("element-" + cnt);
				cnt++;
				Thread.sleep(10);
			}
			LOG.info("Stopping producer");
		}

		@Override
		public void cancel() {
			running = false;
		}
	});
	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), props, null);

	// ----------- add consumer dataflow ----------

	NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema();
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	tryExecute(env, "run auto offset reset test");

	kafkaServer.deleteTestTopic(topic);
}

Source File: KafkaConsumerTestBase.java From Flink-CEPplus with Apache License 2.0

4 votes

protected String writeSequence(
		String baseTopicName,
		final int numElements,
		final int parallelism,
		final int replicationFactor) throws Exception {
	LOG.info("\n===================================\n" +
			"== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" +
			"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
			TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
			new KeyedSerializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
			new KafkaDeserializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final int maxNumAttempts = 10;

	for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {

		final String topicName = baseTopicName + '-' + attempt;

		LOG.info("Writing attempt #" + attempt);

		// -------- Write the Sequence --------

		createTestTopic(topicName, parallelism, replicationFactor);

		StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
		writeEnv.getConfig().disableSysoutLogging();

		DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

			private boolean running = true;

			@Override
			public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
				int cnt = 0;
				int partition = getRuntimeContext().getIndexOfThisSubtask();

				while (running && cnt < numElements) {
					ctx.collect(new Tuple2<>(partition, cnt));
					cnt++;
				}
			}

			@Override
			public void cancel() {
				running = false;
			}
		}).setParallelism(parallelism);

		// the producer must not produce duplicates
		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
		producerProperties.setProperty("retries", "0");
		producerProperties.putAll(secureProps);

		kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
				.setParallelism(parallelism);

		try {
			writeEnv.execute("Write sequence");
		}
		catch (Exception e) {
			LOG.error("Write attempt failed, trying again", e);
			deleteTestTopic(topicName);
			waitUntilNoJobIsRunning(client);
			continue;
		}

		LOG.info("Finished writing sequence");

		// -------- Validate the Sequence --------

		// we need to validate the sequence, because kafka's producers are not exactly once
		LOG.info("Validating sequence");

		waitUntilNoJobIsRunning(client);

		if (validateSequence(topicName, parallelism, deserSchema, numElements)) {
			// everything is good!
			return topicName;
		}
		else {
			deleteTestTopic(topicName);
			// fall through the loop
		}
	}

	throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}

Source File: KafkaProducerTestBase.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * This test sets KafkaProducer so that it will  automatically flush the data and
 * and fails the broker to check whether flushed records since last checkpoint were not duplicated.
 */
protected void testExactlyOnce(boolean regularSink, int sinksCount) throws Exception {
	final String topic = (regularSink ? "exactlyOnceTopicRegularSink" : "exactlyTopicCustomOperator") + sinksCount;
	final int partition = 0;
	final int numElements = 1000;
	final int failAfterElements = 333;

	for (int i = 0; i < sinksCount; i++) {
		createTestTopic(topic + i, 1, 1);
	}

	TypeInformationSerializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
	KeyedSerializationSchema<Integer> keyedSerializationSchema = new KeyedSerializationSchemaWrapper<>(schema);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(500);
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
	env.getConfig().disableSysoutLogging();

	Properties properties = new Properties();
	properties.putAll(standardProps);
	properties.putAll(secureProps);

	// process exactly failAfterElements number of elements and then shutdown Kafka broker and fail application
	List<Integer> expectedElements = getIntegersSequence(numElements);

	DataStream<Integer> inputStream = env
		.addSource(new IntegerSource(numElements))
		.map(new FailingIdentityMapper<Integer>(failAfterElements));

	for (int i = 0; i < sinksCount; i++) {
		FlinkKafkaPartitioner<Integer> partitioner = new FlinkKafkaPartitioner<Integer>() {
			@Override
			public int partition(Integer record, byte[] key, byte[] value, String targetTopic, int[] partitions) {
				return partition;
			}
		};

		if (regularSink) {
			StreamSink<Integer> kafkaSink = kafkaServer.getProducerSink(topic + i, keyedSerializationSchema, properties, partitioner);
			inputStream.addSink(kafkaSink.getUserFunction());
		} else {
			kafkaServer.produceIntoKafka(inputStream, topic + i, keyedSerializationSchema, properties, partitioner);
		}
	}

	FailingIdentityMapper.failedBefore = false;
	TestUtils.tryExecute(env, "Exactly once test");

	for (int i = 0; i < sinksCount; i++) {
		// assert that before failure we successfully snapshot/flushed all expected elements
		assertExactlyOnceForTopic(
			properties,
			topic + i,
			partition,
			expectedElements,
			KAFKA_READ_TIMEOUT);
		deleteTestTopic(topic + i);
	}
}

Source File: FlinkKafkaProducer.java From flink with Apache License 2.0

4 votes

/**
 * @deprecated Use {@link FlinkKafkaProducer08#FlinkKafkaProducer08(String, SerializationSchema, Properties, KafkaPartitioner)}
 */
@Deprecated
public FlinkKafkaProducer(String topicId, SerializationSchema<IN> serializationSchema, Properties producerConfig, KafkaPartitioner customPartitioner) {
	super(topicId, new KeyedSerializationSchemaWrapper<>(serializationSchema), producerConfig, customPartitioner);

}

Source File: FlinkKafkaProducer.java From flink with Apache License 2.0

4 votes

/**
 * @deprecated Use {@link FlinkKafkaProducer08#FlinkKafkaProducer08(String, SerializationSchema, Properties)}
 */
@Deprecated
public FlinkKafkaProducer(String topicId, SerializationSchema<IN> serializationSchema, Properties producerConfig) {
	super(topicId, new KeyedSerializationSchemaWrapper<>(serializationSchema), producerConfig, (FlinkKafkaPartitioner<IN>) null);
}

Source File: FlinkKafkaProducer.java From flink with Apache License 2.0

4 votes

/**
 * @deprecated Use {@link FlinkKafkaProducer08#FlinkKafkaProducer08(String, String, SerializationSchema)}
 */
@Deprecated
public FlinkKafkaProducer(String brokerList, String topicId, SerializationSchema<IN> serializationSchema) {
	super(topicId, new KeyedSerializationSchemaWrapper<>(serializationSchema), getPropertiesFromBrokerList(brokerList), (FlinkKafkaPartitioner<IN>) null);
}

Source File: KafkaConsumerTestBase.java From Flink-CEPplus with Apache License 2.0

4 votes

protected void writeAppendSequence(
		String topicName,
		final int originalNumElements,
		final int numElementsToAppend,
		final int parallelism) throws Exception {

	LOG.info("\n===================================\n" +
		"== Appending sequence of " + numElementsToAppend + " into " + topicName +
		"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
		TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
		new KeyedSerializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
		new KafkaDeserializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	// -------- Write the append sequence --------

	StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	writeEnv.getConfig().disableSysoutLogging();

	DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
			int cnt = originalNumElements;
			int partition = getRuntimeContext().getIndexOfThisSubtask();

			while (running && cnt < numElementsToAppend + originalNumElements) {
				ctx.collect(new Tuple2<>(partition, cnt));
				cnt++;
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	}).setParallelism(parallelism);

	// the producer must not produce duplicates
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "0");
	producerProperties.putAll(secureProps);

	kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
		.setParallelism(parallelism);

	try {
		writeEnv.execute("Write sequence");
	}
	catch (Exception e) {
		throw new Exception("Failed to append sequence to Kafka; append job failed.", e);
	}

	LOG.info("Finished writing append sequence");

	// we need to validate the sequence, because kafka's producers are not exactly once
	LOG.info("Validating sequence");
	while (!getRunningJobs(client).isEmpty()){
		Thread.sleep(50);
	}

	if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) {
		throw new Exception("Could not append a valid sequence to Kafka.");
	}
}

Source File: Kafka010ITCase.java From flink with Apache License 2.0

4 votes

/**
 * Kafka 0.10 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Ignore("This test is disabled because of: https://issues.apache.org/jira/browse/FLINK-9217")
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255105836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1000L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer010.FlinkKafkaProducer010Configuration prod = FlinkKafkaProducer010.writeToKafkaWithTimestamps(streamWithTimestamps, topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	});
	prod.setParallelism(3);
	prod.setWriteTimestampToKafka(true);
	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer010<Long> kafkaSource = new FlinkKafkaConsumer010<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111073247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 10 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}

Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Test ensuring that the producer is not dropping buffered records;
 * we set a timeout because the test will not finish if the logic is broken.
 */
@SuppressWarnings("unchecked")
@Test(timeout = 10000)
public void testAtLeastOnceProducer() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
	producer.setFlushOnCheckpoint(true);

	final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();

	final OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));
	testHarness.processElement(new StreamRecord<>("msg-2"));
	testHarness.processElement(new StreamRecord<>("msg-3"));

	verify(mockProducer, times(3)).send(any(ProducerRecord.class), any(Callback.class));
	Assert.assertEquals(3, producer.getPendingSize());

	// start a thread to perform checkpointing
	CheckedThread snapshotThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			// this should block until all records are flushed;
			// if the snapshot implementation returns before pending records are flushed,
			testHarness.snapshot(123L, 123L);
		}
	};
	snapshotThread.start();

	// before proceeding, make sure that flushing has started and that the snapshot is still blocked;
	// this would block forever if the snapshot didn't perform a flush
	producer.waitUntilFlushStarted();
	Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());

	// now, complete the callbacks
	producer.getPendingCallbacks().get(0).onCompletion(null, null);
	Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
	Assert.assertEquals(2, producer.getPendingSize());

	producer.getPendingCallbacks().get(1).onCompletion(null, null);
	Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
	Assert.assertEquals(1, producer.getPendingSize());

	producer.getPendingCallbacks().get(2).onCompletion(null, null);
	Assert.assertEquals(0, producer.getPendingSize());

	// this would fail with an exception if flushing wasn't completed before the snapshot method returned
	snapshotThread.sync();

	testHarness.close();
}

Source File: KafkaITCase.java From flink with Apache License 2.0

4 votes

/**
 * Kafka 20 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer<Long> prod = new FlinkKafkaProducer<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer<Long> kafkaSource = new FlinkKafkaConsumer<>(topic, new KafkaITCase.LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}

Source File: Kafka011ITCase.java From flink with Apache License 2.0

4 votes

/**
 * Kafka 0.11 specific test, ensuring Timestamps are properly written to and read from Kafka.
 */
@Test(timeout = 60000)
public void testTimestamps() throws Exception {

	final String topic = "tstopic";
	createTestTopic(topic, 3, 1);

	// ---------- Produce an event time stream into Kafka -------------------

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
		private static final long serialVersionUID = -2255115836471289626L;
		boolean running = true;

		@Override
		public void run(SourceContext<Long> ctx) throws Exception {
			long i = 0;
			while (running) {
				ctx.collectWithTimestamp(i, i * 2);
				if (i++ == 1110L) {
					running = false;
				}
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
	FlinkKafkaProducer011<Long> prod = new FlinkKafkaProducer011<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
		private static final long serialVersionUID = -6730989584364230617L;

		@Override
		public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
			return (int) (next % 3);
		}
	}));
	prod.setWriteTimestampToKafka(true);

	streamWithTimestamps.addSink(prod).setParallelism(3);

	env.execute("Produce some");

	// ---------- Consume stream from Kafka -------------------

	env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	FlinkKafkaConsumer011<Long> kafkaSource = new FlinkKafkaConsumer011<>(topic, new LimitedLongDeserializer(), standardProps);
	kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
		private static final long serialVersionUID = -4834111173247835189L;

		@Nullable
		@Override
		public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
			if (lastElement % 11 == 0) {
				return new Watermark(lastElement);
			}
			return null;
		}

		@Override
		public long extractTimestamp(Long element, long previousElementTimestamp) {
			return previousElementTimestamp;
		}
	});

	DataStream<Long> stream = env.addSource(kafkaSource);
	GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
	stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);

	env.execute("Consume again");

	deleteTestTopic(topic);
}

Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Test ensuring that if an async exception is caught for one of the flushed requests on checkpoint,
 * it should be rethrown; we set a timeout because the test will not finish if the logic is broken.
 *
 * <p>Note that this test does not test the snapshot method is blocked correctly when there are pending records.
 * The test for that is covered in testAtLeastOnceProducer.
 */
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testAsyncErrorRethrownOnCheckpointAfterFlush() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
	producer.setFlushOnCheckpoint(true);

	final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();

	final OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));
	testHarness.processElement(new StreamRecord<>("msg-2"));
	testHarness.processElement(new StreamRecord<>("msg-3"));

	verify(mockProducer, times(3)).send(any(ProducerRecord.class), any(Callback.class));

	// only let the first callback succeed for now
	producer.getPendingCallbacks().get(0).onCompletion(null, null);

	CheckedThread snapshotThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			// this should block at first, since there are still two pending records that needs to be flushed
			testHarness.snapshot(123L, 123L);
		}
	};
	snapshotThread.start();

	// let the 2nd message fail with an async exception
	producer.getPendingCallbacks().get(1).onCompletion(null, new Exception("artificial async failure for 2nd message"));
	producer.getPendingCallbacks().get(2).onCompletion(null, null);

	try {
		snapshotThread.sync();
	} catch (Exception e) {
		// the snapshot should have failed with the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async failure for 2nd message"));

		// test succeeded
		return;
	}

	Assert.fail();
}

org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper Java Examples