org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction Java Exaples

Source File: AdvertisingTopologyFlinkWindows.java From yahoo-streaming-benchmark with Apache License 2.0

6 votes

/**
 * Choose source - either Kafka or data generator
 */
private static DataStream<String> streamSource(BenchmarkConfig config, StreamExecutionEnvironment env) {
  // Choose a source -- Either local generator or Kafka
  RichParallelSourceFunction<String> source;
  String sourceName;
  if (config.useLocalEventGenerator) {
    EventGeneratorSource eventGenerator = new EventGeneratorSource(config);
    source = eventGenerator;
    sourceName = "EventGenerator";

    Map<String, List<String>> campaigns = eventGenerator.getCampaigns();
    RedisHelper redisHelper = new RedisHelper(config);
    redisHelper.prepareRedis(campaigns);
    redisHelper.writeCampaignFile(campaigns);
  } else {
    source = kafkaSource(config);
    sourceName = "Kafka";
  }

  return env.addSource(source, sourceName);
}

Source File: AdvertisingTopologyFlinkState.java From yahoo-streaming-benchmark with Apache License 2.0

6 votes

/**
 * Choose source - either Kafka or data generator
 */
private static DataStream<String> sourceStream(BenchmarkConfig config, StreamExecutionEnvironment env) {
  RichParallelSourceFunction<String> source;
  String sourceName;
  if (config.useLocalEventGenerator) {
    EventGeneratorSource eventGenerator = new EventGeneratorSource(config);
    source = eventGenerator;
    sourceName = "EventGenerator";
    prepareRedis(config, eventGenerator);
  } else {
    source = kafkaSource(config);
    sourceName = "Kafka";
  }

  return env.addSource(source, sourceName);
}

Source File: AdvertisingTopologyRedisDirect.java From yahoo-streaming-benchmark with Apache License 2.0

5 votes

/**
 * Choose either Kafka or data generator as source
 */
private static DataStream<String> sourceStream(BenchmarkConfig config, StreamExecutionEnvironment env) {
  RichParallelSourceFunction<String> source;
  String sourceName;
  if (config.useLocalEventGenerator) {
    HighKeyCardinalityGeneratorSource eventGenerator = new HighKeyCardinalityGeneratorSource(config);
    source = eventGenerator;
    sourceName = "EventGenerator";
  } else {
    source = new FlinkKafkaConsumer082<>(config.kafkaTopic, new SimpleStringSchema(), config.getParameters().getProperties());
    sourceName = "Kafka";
  }

  return env.addSource(source, sourceName);
}

Source File: Kafka011SourceBuilder.java From Alink with Apache License 2.0

5 votes

@Override
public RichParallelSourceFunction<Row> build() {
    FlinkKafkaConsumer011<Row> consumer;
    if (!StringUtils.isNullOrWhitespaceOnly(topicPattern)) {
        Pattern pattern = Pattern.compile(topicPattern);
        consumer = new FlinkKafkaConsumer011<Row>(pattern, new MessageDeserialization(), properties);
    } else {
        consumer = new FlinkKafkaConsumer011<Row>(topic, new MessageDeserialization(), properties);
    }
    switch (super.startupMode) {
        case LATEST: {
            consumer.setStartFromLatest();
            break;
        }
        case EARLIEST: {
            consumer.setStartFromEarliest();
            break;
        }
        case GROUP_OFFSETS: {
            consumer.setStartFromGroupOffsets();
            break;
        }
        case TIMESTAMP: {
            consumer.setStartFromTimestamp(startTimeMs);
            break;
        }
        default: {
            throw new IllegalArgumentException("invalid startupMode.");
        }
    }

    return consumer;
}

Source File: AdvertisingTopologyFlinkStateHighKeyCard.java From yahoo-streaming-benchmark with Apache License 2.0

5 votes

/**
 * Choose data source, either Kafka or data generator
 */
private static DataStream<String> streamSource(BenchmarkConfig config, StreamExecutionEnvironment env) {
  RichParallelSourceFunction<String> source;
  String sourceName;
  if (config.useLocalEventGenerator) {
    HighKeyCardinalityGeneratorSource eventGenerator = new HighKeyCardinalityGeneratorSource(config);
    source = eventGenerator;
    sourceName = "EventGenerator";
  } else {
    source = kafkaSource(config);
    sourceName = "Kafka";
  }

  return env.addSource(source, sourceName);
}

Source File: KafkaSourceBuilder.java From Alink with Apache License 2.0

5 votes

@Override
public RichParallelSourceFunction<Row> build() {
    FlinkKafkaConsumer<Row> consumer;
    if (!StringUtils.isNullOrWhitespaceOnly(topicPattern)) {
        Pattern pattern = Pattern.compile(topicPattern);
        consumer = new FlinkKafkaConsumer<Row>(pattern, new MessageDeserialization(), properties);
    } else {
        consumer = new FlinkKafkaConsumer<Row>(topic, new MessageDeserialization(), properties);
    }

    switch (super.startupMode) {
        case LATEST: {
            consumer.setStartFromLatest();
            break;
        }
        case EARLIEST: {
            consumer.setStartFromEarliest();
            break;
        }
        case GROUP_OFFSETS: {
            consumer.setStartFromGroupOffsets();
            break;
        }
        case TIMESTAMP: {
            consumer.setStartFromTimestamp(startTimeMs);
            break;
        }
        default: {
            throw new IllegalArgumentException("invalid startupMode.");
        }
    }

    return consumer;
}

Source File: Kafka010SourceBuilder.java From Alink with Apache License 2.0

5 votes

@Override
public RichParallelSourceFunction<Row> build() {
    FlinkKafkaConsumer010<Row> consumer;
    if (!StringUtils.isNullOrWhitespaceOnly(topicPattern)) {
        Pattern pattern = Pattern.compile(topicPattern);
        consumer = new FlinkKafkaConsumer010<Row>(pattern, new MessageDeserialization(), properties);
    } else {
        consumer = new FlinkKafkaConsumer010<Row>(topic, new MessageDeserialization(), properties);
    }
    switch (super.startupMode) {
        case LATEST: {
            consumer.setStartFromLatest();
            break;
        }
        case EARLIEST: {
            consumer.setStartFromEarliest();
            break;
        }
        case GROUP_OFFSETS: {
            consumer.setStartFromGroupOffsets();
            break;
        }
        case TIMESTAMP: {
            consumer.setStartFromTimestamp(startTimeMs);
            break;
        }
        default: {
            throw new IllegalArgumentException("invalid startupMode.");
        }
    }

    return consumer;
}

Source File: ChainingSpeed.java From flink-perf with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	final ParameterTool pt = ParameterTool.fromArgs(args);
	see.getConfig().setGlobalJobParameters(pt);
	see.getConfig().enableObjectReuse();

	// see.setParallelism(1);

	DataStreamSource<Integer> src = see.addSource(new RichParallelSourceFunction<Integer>() {

		boolean running = true;
		@Override
		public void run(SourceContext<Integer> ctx) throws Exception {
			int i = 0;
			while (running) {
				ctx.collect(i++);
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	});

	src/*.map(new MapFunction<Integer, Integer>() {
		@Override
		public Integer map(Integer s) throws Exception {
			return s;
		}
	}).*/.map(new MapFunction<Integer, Integer>() {
		@Override
		public Integer map(Integer s) throws Exception {
			return s;
		}
	}).flatMap(new RichFlatMapFunction<Integer, Integer>() {
		long received = 0;
		long logfreq = pt.getInt("logfreq");
		long lastLog = -1;
		long lastElements = 0;
		long matches = 0;
		private final Pattern threeDigitAbbr = Pattern.compile("[A-Z]{3}\\.");

		@Override
		public void open(Configuration parameters) throws Exception {
			super.open(parameters);
		}

		@Override
		public void flatMap(Integer in, Collector<Integer> collector) throws Exception {

			received++;
			if (received % logfreq == 0) {
				// throughput over entire time
				long now = System.currentTimeMillis();

				// throughput for the last "logfreq" elements
				if (lastLog == -1) {
					// init (the first)
					lastLog = now;
					lastElements = received;
				} else {
					long timeDiff = now - lastLog;
					long elementDiff = received - lastElements;
					double ex = (1000 / (double) timeDiff);
					LOG.info("During the last {} ms, we received {} elements. That's {} elements/second/core", timeDiff, elementDiff, Double.valueOf(elementDiff * ex).longValue());
					// reinit
					lastLog = now;
					lastElements = received;
				}
			}
		}
	});

	see.execute();
}

Source File: KafkaShortRetentionTestBase.java From Flink-CEPplus with Apache License 2.0

4 votes

public void runAutoOffsetResetTest() throws Exception {
	final String topic = "auto-offset-reset-test";

	final int parallelism = 1;
	final int elementsPerPartition = 50000;

	Properties tprops = new Properties();
	tprops.setProperty("retention.ms", "250");
	kafkaServer.createTestTopic(topic, parallelism, 1, tprops);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately
	env.getConfig().disableSysoutLogging();

	// ----------- add producer dataflow ----------

	DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<String> ctx) throws InterruptedException {
			int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition;
			int limit = cnt + elementsPerPartition;

			while (running && !stopProducer && cnt < limit) {
				ctx.collect("element-" + cnt);
				cnt++;
				Thread.sleep(10);
			}
			LOG.info("Stopping producer");
		}

		@Override
		public void cancel() {
			running = false;
		}
	});
	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), props, null);

	// ----------- add consumer dataflow ----------

	NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema();
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	tryExecute(env, "run auto offset reset test");

	kafkaServer.deleteTestTopic(topic);
}

Source File: KafkaConsumerTestBase.java From flink with Apache License 2.0

4 votes

protected void writeAppendSequence(
		String topicName,
		final int originalNumElements,
		final int numElementsToAppend,
		final int parallelism) throws Exception {

	LOG.info("\n===================================\n" +
		"== Appending sequence of " + numElementsToAppend + " into " + topicName +
		"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
		TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final SerializationSchema<Tuple2<Integer, Integer>> serSchema =
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig());

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
		new KafkaDeserializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	// -------- Write the append sequence --------

	StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
			int cnt = originalNumElements;
			int partition = getRuntimeContext().getIndexOfThisSubtask();

			while (running && cnt < numElementsToAppend + originalNumElements) {
				ctx.collect(new Tuple2<>(partition, cnt));
				cnt++;
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	}).setParallelism(parallelism);

	// the producer must not produce duplicates
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "0");
	producerProperties.putAll(secureProps);

	kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
		.setParallelism(parallelism);

	try {
		writeEnv.execute("Write sequence");
	}
	catch (Exception e) {
		throw new Exception("Failed to append sequence to Kafka; append job failed.", e);
	}

	LOG.info("Finished writing append sequence");

	// we need to validate the sequence, because kafka's producers are not exactly once
	LOG.info("Validating sequence");
	while (!getRunningJobs(client).isEmpty()){
		Thread.sleep(50);
	}

	if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) {
		throw new Exception("Could not append a valid sequence to Kafka.");
	}
}

Source File: KafkaConsumerTestBase.java From flink with Apache License 2.0

4 votes

protected String writeSequence(
		String baseTopicName,
		final int numElements,
		final int parallelism,
		final int replicationFactor) throws Exception {
	LOG.info("\n===================================\n" +
			"== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" +
			"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
			TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final SerializationSchema<Tuple2<Integer, Integer>> serSchema =
				new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig());

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
			new KafkaDeserializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final int maxNumAttempts = 10;

	for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {

		final String topicName = baseTopicName + '-' + attempt;

		LOG.info("Writing attempt #" + attempt);

		// -------- Write the Sequence --------

		createTestTopic(topicName, parallelism, replicationFactor);

		StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
		DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

			private boolean running = true;

			@Override
			public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
				int cnt = 0;
				int partition = getRuntimeContext().getIndexOfThisSubtask();

				while (running && cnt < numElements) {
					ctx.collect(new Tuple2<>(partition, cnt));
					cnt++;
				}
			}

			@Override
			public void cancel() {
				running = false;
			}
		}).setParallelism(parallelism);

		// the producer must not produce duplicates
		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
		producerProperties.setProperty("retries", "0");
		producerProperties.putAll(secureProps);

		kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
				.setParallelism(parallelism);

		try {
			writeEnv.execute("Write sequence");
		}
		catch (Exception e) {
			LOG.error("Write attempt failed, trying again", e);
			deleteTestTopic(topicName);
			waitUntilNoJobIsRunning(client);
			continue;
		}

		LOG.info("Finished writing sequence");

		// -------- Validate the Sequence --------

		// we need to validate the sequence, because kafka's producers are not exactly once
		LOG.info("Validating sequence");

		waitUntilNoJobIsRunning(client);

		if (validateSequence(topicName, parallelism, deserSchema, numElements)) {
			// everything is good!
			return topicName;
		}
		else {
			deleteTestTopic(topicName);
			// fall through the loop
		}
	}

	throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}

Source File: KafkaShortRetentionTestBase.java From flink with Apache License 2.0

4 votes

public void runAutoOffsetResetTest() throws Exception {
	final String topic = "auto-offset-reset-test";

	final int parallelism = 1;
	final int elementsPerPartition = 50000;

	Properties tprops = new Properties();
	tprops.setProperty("retention.ms", "250");
	kafkaServer.createTestTopic(topic, parallelism, 1, tprops);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately

	// ----------- add producer dataflow ----------

	DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<String> ctx) throws InterruptedException {
			int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition;
			int limit = cnt + elementsPerPartition;

			while (running && !stopProducer && cnt < limit) {
				ctx.collect("element-" + cnt);
				cnt++;
				Thread.sleep(10);
			}
			LOG.info("Stopping producer");
		}

		@Override
		public void cancel() {
			running = false;
		}
	});
	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	kafkaServer.produceIntoKafka(stream, topic, new SimpleStringSchema(), props, null);

	// ----------- add consumer dataflow ----------

	NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema();
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	tryExecute(env, "run auto offset reset test");

	kafkaServer.deleteTestTopic(topic);
}

Source File: StateMain.java From flink-learning with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(10000);
    env.setStateBackend(new MemoryStateBackend());

    env.addSource(new RichParallelSourceFunction<Tuple2<String, Long>>() {
        @Override
        public void run(SourceContext<Tuple2<String, Long>> sourceContext) throws Exception {
            while (true) {
                sourceContext.collect(new Tuple2<>(String.valueOf(System.currentTimeMillis()), System.currentTimeMillis()));
                Thread.sleep(10);
            }
        }

        @Override
        public void cancel() {

        }
    }).keyBy(0)
            .map(new RichMapFunction<Tuple2<String, Long>, Tuple2<String, Long>>() {

                private ValueState<Long> state;

                @Override
                public void open(Configuration parameters) throws Exception {
                    super.open(parameters);
                    state = getRuntimeContext().getState(
                            new ValueStateDescriptor<>("uvState",
                                    TypeInformation.of(new TypeHint<Long>() {
                                    })));
                }

                @Override
                public Tuple2<String, Long> map(Tuple2<String, Long> tuple2) throws Exception {
                    state.update(tuple2.f1);
                    return tuple2;
                }
            }).print();

    env.execute();
}

Source File: KafkaConsumerTestBase.java From flink with Apache License 2.0

4 votes

protected void writeAppendSequence(
		String topicName,
		final int originalNumElements,
		final int numElementsToAppend,
		final int parallelism) throws Exception {

	LOG.info("\n===================================\n" +
		"== Appending sequence of " + numElementsToAppend + " into " + topicName +
		"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
		TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
		new KeyedSerializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
		new KafkaDeserializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	// -------- Write the append sequence --------

	StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	writeEnv.getConfig().disableSysoutLogging();

	DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
			int cnt = originalNumElements;
			int partition = getRuntimeContext().getIndexOfThisSubtask();

			while (running && cnt < numElementsToAppend + originalNumElements) {
				ctx.collect(new Tuple2<>(partition, cnt));
				cnt++;
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	}).setParallelism(parallelism);

	// the producer must not produce duplicates
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "0");
	producerProperties.putAll(secureProps);

	kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
		.setParallelism(parallelism);

	try {
		writeEnv.execute("Write sequence");
	}
	catch (Exception e) {
		throw new Exception("Failed to append sequence to Kafka; append job failed.", e);
	}

	LOG.info("Finished writing append sequence");

	// we need to validate the sequence, because kafka's producers are not exactly once
	LOG.info("Validating sequence");
	while (!getRunningJobs(client).isEmpty()){
		Thread.sleep(50);
	}

	if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) {
		throw new Exception("Could not append a valid sequence to Kafka.");
	}
}

Source File: KafkaConsumerTestBase.java From flink with Apache License 2.0

4 votes

protected String writeSequence(
		String baseTopicName,
		final int numElements,
		final int parallelism,
		final int replicationFactor) throws Exception {
	LOG.info("\n===================================\n" +
			"== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" +
			"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
			TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
			new KeyedSerializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
			new KafkaDeserializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final int maxNumAttempts = 10;

	for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {

		final String topicName = baseTopicName + '-' + attempt;

		LOG.info("Writing attempt #" + attempt);

		// -------- Write the Sequence --------

		createTestTopic(topicName, parallelism, replicationFactor);

		StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
		writeEnv.getConfig().disableSysoutLogging();

		DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

			private boolean running = true;

			@Override
			public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
				int cnt = 0;
				int partition = getRuntimeContext().getIndexOfThisSubtask();

				while (running && cnt < numElements) {
					ctx.collect(new Tuple2<>(partition, cnt));
					cnt++;
				}
			}

			@Override
			public void cancel() {
				running = false;
			}
		}).setParallelism(parallelism);

		// the producer must not produce duplicates
		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
		producerProperties.setProperty("retries", "0");
		producerProperties.putAll(secureProps);

		kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
				.setParallelism(parallelism);

		try {
			writeEnv.execute("Write sequence");
		}
		catch (Exception e) {
			LOG.error("Write attempt failed, trying again", e);
			deleteTestTopic(topicName);
			waitUntilNoJobIsRunning(client);
			continue;
		}

		LOG.info("Finished writing sequence");

		// -------- Validate the Sequence --------

		// we need to validate the sequence, because kafka's producers are not exactly once
		LOG.info("Validating sequence");

		waitUntilNoJobIsRunning(client);

		if (validateSequence(topicName, parallelism, deserSchema, numElements)) {
			// everything is good!
			return topicName;
		}
		else {
			deleteTestTopic(topicName);
			// fall through the loop
		}
	}

	throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}

Source File: KafkaShortRetentionTestBase.java From flink with Apache License 2.0

4 votes

public void runAutoOffsetResetTest() throws Exception {
	final String topic = "auto-offset-reset-test";

	final int parallelism = 1;
	final int elementsPerPartition = 50000;

	Properties tprops = new Properties();
	tprops.setProperty("retention.ms", "250");
	kafkaServer.createTestTopic(topic, parallelism, 1, tprops);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately
	env.getConfig().disableSysoutLogging();

	// ----------- add producer dataflow ----------

	DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<String> ctx) throws InterruptedException {
			int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition;
			int limit = cnt + elementsPerPartition;

			while (running && !stopProducer && cnt < limit) {
				ctx.collect("element-" + cnt);
				cnt++;
				Thread.sleep(10);
			}
			LOG.info("Stopping producer");
		}

		@Override
		public void cancel() {
			running = false;
		}
	});
	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), props, null);

	// ----------- add consumer dataflow ----------

	NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema();
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	tryExecute(env, "run auto offset reset test");

	kafkaServer.deleteTestTopic(topic);
}

Source File: StateMain.java From flink-learning with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(10000);
    env.setStateBackend(new MemoryStateBackend());

    env.addSource(new RichParallelSourceFunction<Tuple2<String, Long>>() {
        @Override
        public void run(SourceContext<Tuple2<String, Long>> sourceContext) throws Exception {
            while (true) {
                sourceContext.collect(new Tuple2<>(String.valueOf(System.currentTimeMillis()), System.currentTimeMillis()));
                Thread.sleep(10);
            }
        }

        @Override
        public void cancel() {

        }
    }).keyBy(0)
            .map(new RichMapFunction<Tuple2<String, Long>, Tuple2<String, Long>>() {

                private ValueState<Long> state;

                @Override
                public void open(Configuration parameters) throws Exception {
                    super.open(parameters);
                    state = getRuntimeContext().getState(
                            new ValueStateDescriptor<>("uvState",
                                    TypeInformation.of(new TypeHint<Long>() {
                                    })));
                }

                @Override
                public Tuple2<String, Long> map(Tuple2<String, Long> tuple2) throws Exception {
                    state.update(tuple2.f1);
                    return tuple2;
                }
            }).print();

    env.execute();
}

Source File: KafkaConsumerTestBase.java From Flink-CEPplus with Apache License 2.0

4 votes

protected void writeAppendSequence(
		String topicName,
		final int originalNumElements,
		final int numElementsToAppend,
		final int parallelism) throws Exception {

	LOG.info("\n===================================\n" +
		"== Appending sequence of " + numElementsToAppend + " into " + topicName +
		"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
		TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
		new KeyedSerializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
		new KafkaDeserializationSchemaWrapper<>(
			new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	// -------- Write the append sequence --------

	StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
	writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
	writeEnv.getConfig().disableSysoutLogging();

	DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
			int cnt = originalNumElements;
			int partition = getRuntimeContext().getIndexOfThisSubtask();

			while (running && cnt < numElementsToAppend + originalNumElements) {
				ctx.collect(new Tuple2<>(partition, cnt));
				cnt++;
			}
		}

		@Override
		public void cancel() {
			running = false;
		}
	}).setParallelism(parallelism);

	// the producer must not produce duplicates
	Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
	producerProperties.setProperty("retries", "0");
	producerProperties.putAll(secureProps);

	kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
		.setParallelism(parallelism);

	try {
		writeEnv.execute("Write sequence");
	}
	catch (Exception e) {
		throw new Exception("Failed to append sequence to Kafka; append job failed.", e);
	}

	LOG.info("Finished writing append sequence");

	// we need to validate the sequence, because kafka's producers are not exactly once
	LOG.info("Validating sequence");
	while (!getRunningJobs(client).isEmpty()){
		Thread.sleep(50);
	}

	if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) {
		throw new Exception("Could not append a valid sequence to Kafka.");
	}
}

Source File: KafkaConsumerTestBase.java From Flink-CEPplus with Apache License 2.0

4 votes

protected String writeSequence(
		String baseTopicName,
		final int numElements,
		final int parallelism,
		final int replicationFactor) throws Exception {
	LOG.info("\n===================================\n" +
			"== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" +
			"===================================");

	final TypeInformation<Tuple2<Integer, Integer>> resultType =
			TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {});

	final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema =
			new KeyedSerializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema =
			new KafkaDeserializationSchemaWrapper<>(
					new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));

	final int maxNumAttempts = 10;

	for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {

		final String topicName = baseTopicName + '-' + attempt;

		LOG.info("Writing attempt #" + attempt);

		// -------- Write the Sequence --------

		createTestTopic(topicName, parallelism, replicationFactor);

		StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
		writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
		writeEnv.getConfig().disableSysoutLogging();

		DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

			private boolean running = true;

			@Override
			public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
				int cnt = 0;
				int partition = getRuntimeContext().getIndexOfThisSubtask();

				while (running && cnt < numElements) {
					ctx.collect(new Tuple2<>(partition, cnt));
					cnt++;
				}
			}

			@Override
			public void cancel() {
				running = false;
			}
		}).setParallelism(parallelism);

		// the producer must not produce duplicates
		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
		producerProperties.setProperty("retries", "0");
		producerProperties.putAll(secureProps);

		kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism))
				.setParallelism(parallelism);

		try {
			writeEnv.execute("Write sequence");
		}
		catch (Exception e) {
			LOG.error("Write attempt failed, trying again", e);
			deleteTestTopic(topicName);
			waitUntilNoJobIsRunning(client);
			continue;
		}

		LOG.info("Finished writing sequence");

		// -------- Validate the Sequence --------

		// we need to validate the sequence, because kafka's producers are not exactly once
		LOG.info("Validating sequence");

		waitUntilNoJobIsRunning(client);

		if (validateSequence(topicName, parallelism, deserSchema, numElements)) {
			// everything is good!
			return topicName;
		}
		else {
			deleteTestTopic(topicName);
			// fall through the loop
		}
	}

	throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}

Source File: BaseKafkaSourceBuilder.java From Alink with Apache License 2.0

2 votes

/**
 * Construct the {@link RichParallelSourceFunction} for specific version of Kafka.
 */
public abstract RichParallelSourceFunction<Row> build();

org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction Java Examples