org.apache.flink.util.PropertiesUtil Java Exaples

Source File: FlinkKafkaShuffleProducer.java From flink with Apache License 2.0

6 votes

FlinkKafkaShuffleProducer(
		String defaultTopicId,
		TypeSerializer<IN> typeSerializer,
		Properties props,
		KeySelector<IN, KEY> keySelector,
		Semantic semantic,
		int kafkaProducersPoolSize) {
	super(defaultTopicId, (element, timestamp) -> null, props, semantic, kafkaProducersPoolSize);

	this.kafkaSerializer = new KafkaSerializer<>(typeSerializer);
	this.keySelector = keySelector;

	Preconditions.checkArgument(
		props.getProperty(PARTITION_NUMBER) != null,
		"Missing partition number for Kafka Shuffle");
	numberOfPartitions = PropertiesUtil.getInt(props, PARTITION_NUMBER, Integer.MIN_VALUE);
}

Source File: FlinkKafkaConsumer08.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected AbstractFetcher<T, ?> createFetcher(
		SourceContext<T> sourceContext,
		Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
		SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
		SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
		StreamingRuntimeContext runtimeContext,
		OffsetCommitMode offsetCommitMode,
		MetricGroup consumerMetricGroup,
		boolean useMetrics) throws Exception {

	long autoCommitInterval = (offsetCommitMode == OffsetCommitMode.KAFKA_PERIODIC)
			? PropertiesUtil.getLong(kafkaProperties, "auto.commit.interval.ms", 60000)
			: -1; // this disables the periodic offset committer thread in the fetcher

	return new Kafka08Fetcher<>(
			sourceContext,
			assignedPartitionsWithInitialOffsets,
			watermarksPeriodic,
			watermarksPunctuated,
			runtimeContext,
			deserializer,
			kafkaProperties,
			autoCommitInterval,
			consumerMetricGroup,
			useMetrics);
}

Source File: KafkaShuffleITCase.java From flink with Apache License 2.0

5 votes

private Map<Integer, Collection<ConsumerRecord<byte[], byte[]>>> testKafkaShuffleProducer(
		String topic,
		StreamExecutionEnvironment env,
		int numberOfPartitions,
		int producerParallelism,
		int numElementsPerProducer,
		TimeCharacteristic timeCharacteristic) throws Exception {
	createTestTopic(topic, numberOfPartitions, 1);

	env.setParallelism(producerParallelism);
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStreamTimeCharacteristic(timeCharacteristic);

	DataStream<Tuple3<Integer, Long, Integer>> source =
		env.addSource(new KafkaSourceFunction(numElementsPerProducer, false)).setParallelism(producerParallelism);
	DataStream<Tuple3<Integer, Long, Integer>> input = (timeCharacteristic == EventTime) ?
		source.assignTimestampsAndWatermarks(new PunctuatedExtractor()).setParallelism(producerParallelism) : source;

	Properties properties = kafkaServer.getStandardProperties();
	Properties kafkaProperties = PropertiesUtil.flatten(properties);

	kafkaProperties.setProperty(PRODUCER_PARALLELISM, String.valueOf(producerParallelism));
	kafkaProperties.setProperty(PARTITION_NUMBER, String.valueOf(numberOfPartitions));
	kafkaProperties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
	kafkaProperties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
	FlinkKafkaShuffle.writeKeyBy(input, topic, kafkaProperties, 0);

	env.execute("Write to " + topic);
	ImmutableMap.Builder<Integer, Collection<ConsumerRecord<byte[], byte[]>>> results = ImmutableMap.builder();

	for (int p = 0; p < numberOfPartitions; p++) {
		results.put(p, kafkaServer.getAllRecordsFromTopic(kafkaProperties, topic, p, 5000));
	}

	deleteTestTopic(topic);

	return results.build();
}

Source File: FlinkKafkaConsumer08.java From flink with Apache License 2.0

5 votes

@Override
protected AbstractFetcher<T, ?> createFetcher(
		SourceContext<T> sourceContext,
		Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
		SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
		SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
		StreamingRuntimeContext runtimeContext,
		OffsetCommitMode offsetCommitMode,
		MetricGroup consumerMetricGroup,
		boolean useMetrics) throws Exception {

	long autoCommitInterval = (offsetCommitMode == OffsetCommitMode.KAFKA_PERIODIC)
			? PropertiesUtil.getLong(kafkaProperties, "auto.commit.interval.ms", 60000)
			: -1; // this disables the periodic offset committer thread in the fetcher

	return new Kafka08Fetcher<>(
			sourceContext,
			assignedPartitionsWithInitialOffsets,
			watermarksPeriodic,
			watermarksPunctuated,
			runtimeContext,
			deserializer,
			kafkaProperties,
			autoCommitInterval,
			consumerMetricGroup,
			useMetrics);
}

Source File: FlinkKafkaShuffleConsumer.java From flink with Apache License 2.0

5 votes

FlinkKafkaShuffleConsumer(
		String topic,
		TypeInformationSerializationSchema<T> schema,
		TypeSerializer<T> typeSerializer,
		Properties props) {
	// The schema is needed to call the right FlinkKafkaConsumer constructor.
	// It is never used, can be `null`, but `null` confuses the compiler.
	super(topic, schema, props);
	this.typeSerializer = typeSerializer;

	Preconditions.checkArgument(
		props.getProperty(PRODUCER_PARALLELISM) != null,
		"Missing producer parallelism for Kafka Shuffle");
	producerParallelism = PropertiesUtil.getInt(props, PRODUCER_PARALLELISM, Integer.MAX_VALUE);
}

Source File: FlinkKafkaConsumer010.java From flink with Apache License 2.0

4 votes

@Override
protected boolean getIsAutoCommitEnabled() {
	return getBoolean(properties, ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true) &&
		PropertiesUtil.getLong(properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 5000) > 0;
}

Source File: FlinkKafkaConsumer09.java From Flink-CEPplus with Apache License 2.0

4 votes

@Override
protected boolean getIsAutoCommitEnabled() {
	return getBoolean(properties, ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true) &&
			PropertiesUtil.getLong(properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 5000) > 0;
}

Source File: FlinkKafkaConsumer.java From flink with Apache License 2.0

4 votes

@Override
protected boolean getIsAutoCommitEnabled() {
	return getBoolean(properties, ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true) &&
		PropertiesUtil.getLong(properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 5000) > 0;
}

Source File: FlinkKafkaShuffle.java From flink with Apache License 2.0

4 votes

/**
 * The read side of {@link FlinkKafkaShuffle#persistentKeyBy}.
 *
 * <p>Each consumer task should read kafka partitions equal to the key group indices it is assigned.
 * The number of kafka partitions is the maximum parallelism of the consumer.
 * This version only supports numberOfPartitions = consumerParallelism.
 * In the case of using {@link TimeCharacteristic#EventTime}, a consumer task is responsible to emit
 * watermarks. Watermarks are read from the corresponding Kafka partitions. Notice that a consumer task only starts
 * to emit a watermark after receiving at least one watermark from each producer task to make sure watermarks
 * are monotonically increasing. Hence a consumer task needs to know `producerParallelism` as well.
 *
 * <p>Attention: make sure kafkaProperties include
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} and {@link FlinkKafkaShuffle#PARTITION_NUMBER} explicitly.
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} is the parallelism of the producer.
 * {@link FlinkKafkaShuffle#PARTITION_NUMBER} is the number of partitions.
 * They are not necessarily the same and allowed to be set independently.
 *
 * @see FlinkKafkaShuffle#persistentKeyBy
 * @see FlinkKafkaShuffle#writeKeyBy
 *
 * @param topic 			The topic of Kafka where data is persisted
 * @param env 				Execution environment. readKeyBy's environment can be different from writeKeyBy's
 * @param typeInformation 	Type information of the data persisted in Kafka
 * @param kafkaProperties 	kafka properties for Kafka Consumer
 * @param keySelector 		key selector to retrieve key
 * @param <T> 				Schema type
 * @param <K> 				Key type
 * @return Keyed data stream
 */
public static <T, K> KeyedStream<T, K> readKeyBy(
		String topic,
		StreamExecutionEnvironment env,
		TypeInformation<T> typeInformation,
		Properties kafkaProperties,
		KeySelector<T, K> keySelector) {

	TypeSerializer<T> typeSerializer = typeInformation.createSerializer(env.getConfig());
	TypeInformationSerializationSchema<T> schema =
		new TypeInformationSerializationSchema<>(typeInformation, typeSerializer);

	SourceFunction<T> kafkaConsumer  =
		new FlinkKafkaShuffleConsumer<>(topic, schema, typeSerializer, kafkaProperties);

	// TODO: consider situations where numberOfPartitions != consumerParallelism
	Preconditions.checkArgument(
		kafkaProperties.getProperty(PARTITION_NUMBER) != null,
		"Missing partition number for Kafka Shuffle");
	int numberOfPartitions = PropertiesUtil.getInt(kafkaProperties, PARTITION_NUMBER, Integer.MIN_VALUE);
	DataStream<T> outputDataStream = env.addSource(kafkaConsumer).setParallelism(numberOfPartitions);

	return DataStreamUtils.reinterpretAsKeyedStream(outputDataStream, keySelector);
}

Source File: FlinkKafkaShuffle.java From flink with Apache License 2.0

4 votes

/**
 * Uses Kafka as a message bus to persist keyBy shuffle.
 *
 * <p>Persisting keyBy shuffle is achieved by wrapping a {@link FlinkKafkaShuffleProducer} and
 * {@link FlinkKafkaShuffleConsumer} together.
 *
 * <p>On the producer side, {@link FlinkKafkaShuffleProducer}
 * is similar to {@link DataStream#keyBy(KeySelector)}. They use the same key group assignment function
 * {@link KeyGroupRangeAssignment#assignKeyToParallelOperator} to decide which partition a key goes.
 * Hence, each producer task can potentially write to each Kafka partition based on where the key goes.
 * Here, `numberOfPartitions` equals to the key group size.
 * In the case of using {@link TimeCharacteristic#EventTime}, each producer task broadcasts its watermark
 * to ALL of the Kafka partitions to make sure watermark information is propagated correctly.
 *
 * <p>On the consumer side, each consumer task should read partitions equal to the key group indices
 * it is assigned. `numberOfPartitions` is the maximum parallelism of the consumer. This version only
 * supports numberOfPartitions = consumerParallelism.
 * In the case of using {@link TimeCharacteristic#EventTime}, a consumer task is responsible to emit
 * watermarks. Watermarks are read from the corresponding Kafka partitions. Notice that a consumer task only starts
 * to emit a watermark after reading at least one watermark from each producer task to make sure watermarks
 * are monotonically increasing. Hence a consumer task needs to know `producerParallelism` as well.
 *
 * @see FlinkKafkaShuffle#writeKeyBy
 * @see FlinkKafkaShuffle#readKeyBy
 *
 * @param dataStream 			Data stream to be shuffled
 * @param topic 				Kafka topic written to
 * @param producerParallelism 	Parallelism of producer
 * @param numberOfPartitions 	Number of partitions
 * @param properties 			Kafka properties
 * @param keySelector 			Key selector to retrieve key from `dataStream'
 * @param <T> 					Type of the input data stream
 * @param <K> 					Type of key
 */
public static <T, K> KeyedStream<T, K> persistentKeyBy(
		DataStream<T> dataStream,
		String topic,
		int producerParallelism,
		int numberOfPartitions,
		Properties properties,
		KeySelector<T, K> keySelector) {
	// KafkaProducer#propsToMap uses Properties purely as a HashMap without considering the default properties
	// So we have to flatten the default property to first level elements.
	Properties kafkaProperties = PropertiesUtil.flatten(properties);
	kafkaProperties.setProperty(PRODUCER_PARALLELISM, String.valueOf(producerParallelism));
	kafkaProperties.setProperty(PARTITION_NUMBER, String.valueOf(numberOfPartitions));

	StreamExecutionEnvironment env = dataStream.getExecutionEnvironment();

	writeKeyBy(dataStream, topic, kafkaProperties, keySelector);
	return readKeyBy(topic, env, dataStream.getType(), kafkaProperties, keySelector);
}

Source File: LogUtil.java From aliyun-log-flink-connector with Apache License 2.0

4 votes

public static long getFetchIntervalMillis(Properties properties) {
    return PropertiesUtil.getLong(properties,
            ConfigConstants.LOG_FETCH_DATA_INTERVAL_MILLIS,
            Consts.DEFAULT_FETCH_INTERVAL_MILLIS);
}

Source File: LogUtil.java From aliyun-log-flink-connector with Apache License 2.0

4 votes

public static int getNumberPerFetch(Properties properties) {
    return PropertiesUtil.getInt(properties,
            ConfigConstants.LOG_MAX_NUMBER_PER_FETCH,
            Consts.DEFAULT_NUMBER_PER_FETCH);
}

Source File: LogUtil.java From aliyun-log-flink-connector with Apache License 2.0

4 votes

public static long getCommitIntervalMs(Properties props) {
    return PropertiesUtil.getLong(props,
            ConfigConstants.LOG_COMMIT_INTERVAL_MILLIS,
            Consts.DEFAULT_COMMIT_INTERVAL_MILLIS);
}

Source File: LogUtil.java From aliyun-log-flink-connector with Apache License 2.0

4 votes

public static long getDiscoveryIntervalMs(Properties props) {
    return PropertiesUtil.getLong(props,
            ConfigConstants.LOG_SHARDS_DISCOVERY_INTERVAL_MILLIS,
            Consts.DEFAULT_SHARDS_DISCOVERY_INTERVAL_MILLIS);
}

Source File: ConfigWrapper.java From aliyun-log-flink-connector with Apache License 2.0

4 votes

public long getLong(String key, long defaultValue) {
    return PropertiesUtil.getLong(props, key, defaultValue);
}

Source File: ConfigWrapper.java From aliyun-log-flink-connector with Apache License 2.0

4 votes

public int getInt(String key, int defaultValue) {
    return PropertiesUtil.getInt(props, key, defaultValue);
}

Source File: FlinkKafkaConsumer.java From flink with Apache License 2.0

4 votes

@Override
protected boolean getIsAutoCommitEnabled() {
	return getBoolean(properties, ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true) &&
		PropertiesUtil.getLong(properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 5000) > 0;
}

Source File: FlinkKafkaConsumer08.java From flink with Apache License 2.0

4 votes

@Override
protected boolean getIsAutoCommitEnabled() {
	return PropertiesUtil.getBoolean(kafkaProperties, "auto.commit.enable", true) &&
			PropertiesUtil.getLong(kafkaProperties, "auto.commit.interval.ms", 60000) > 0;
}

Source File: FlinkKafkaConsumer09.java From flink with Apache License 2.0

4 votes

@Override
protected boolean getIsAutoCommitEnabled() {
	return getBoolean(properties, ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true) &&
			PropertiesUtil.getLong(properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 5000) > 0;
}

Source File: FlinkKafkaConsumer.java From Flink-CEPplus with Apache License 2.0

4 votes

@Override
protected boolean getIsAutoCommitEnabled() {
	return getBoolean(properties, ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true) &&
		PropertiesUtil.getLong(properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 5000) > 0;
}

Source File: FlinkKafkaConsumer08.java From Flink-CEPplus with Apache License 2.0

4 votes

@Override
protected boolean getIsAutoCommitEnabled() {
	return PropertiesUtil.getBoolean(kafkaProperties, "auto.commit.enable", true) &&
			PropertiesUtil.getLong(kafkaProperties, "auto.commit.interval.ms", 60000) > 0;
}

Source File: FlinkKafkaShuffle.java From flink with Apache License 2.0

3 votes

/**
 * The write side of {@link FlinkKafkaShuffle#persistentKeyBy}.
 *
 * <p>This function contains a {@link FlinkKafkaShuffleProducer} to shuffle and persist data in Kafka.
 * {@link FlinkKafkaShuffleProducer} uses the same key group assignment function
 * {@link KeyGroupRangeAssignment#assignKeyToParallelOperator} to decide which partition a key goes.
 * Hence, each producer task can potentially write to each Kafka partition based on the key.
 * Here, the number of partitions equals to the key group size.
 * In the case of using {@link TimeCharacteristic#EventTime}, each producer task broadcasts each watermark
 * to all of the Kafka partitions to make sure watermark information is propagated properly.
 *
 * <p>Attention: make sure kafkaProperties include
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} and {@link FlinkKafkaShuffle#PARTITION_NUMBER} explicitly.
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} is the parallelism of the producer.
 * {@link FlinkKafkaShuffle#PARTITION_NUMBER} is the number of partitions.
 * They are not necessarily the same and allowed to be set independently.
 *
 * @see FlinkKafkaShuffle#persistentKeyBy
 * @see FlinkKafkaShuffle#readKeyBy
 *
 * @param dataStream 			Data stream to be shuffled
 * @param topic 				Kafka topic written to
 * @param kafkaProperties 		Kafka properties for Kafka Producer
 * @param keySelector 			Key selector to retrieve key from `dataStream'
 * @param <T> 					Type of the input data stream
 * @param <K> 					Type of key
 */
public static <T, K> void writeKeyBy(
		DataStream<T> dataStream,
		String topic,
		Properties kafkaProperties,
		KeySelector<T, K> keySelector) {

	StreamExecutionEnvironment env = dataStream.getExecutionEnvironment();
	TypeSerializer<T> typeSerializer = dataStream.getType().createSerializer(env.getConfig());

	// write data to Kafka
	FlinkKafkaShuffleProducer<T, K> kafkaProducer = new FlinkKafkaShuffleProducer<>(
		topic,
		typeSerializer,
		kafkaProperties,
		env.clean(keySelector),
		FlinkKafkaProducer.Semantic.EXACTLY_ONCE,
		FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE);

	// make sure the sink parallelism is set to producerParallelism
	Preconditions.checkArgument(
		kafkaProperties.getProperty(PRODUCER_PARALLELISM) != null,
		"Missing producer parallelism for Kafka Shuffle");
	int producerParallelism = PropertiesUtil.getInt(kafkaProperties, PRODUCER_PARALLELISM, Integer.MIN_VALUE);

	addKafkaShuffle(dataStream, kafkaProducer, producerParallelism);
}

org.apache.flink.util.PropertiesUtil Java Examples