org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction Java Examples
The following examples show how to use
org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AdvertisingTopologyFlinkWindows.java From yahoo-streaming-benchmark with Apache License 2.0 | 6 votes |
/** * Choose source - either Kafka or data generator */ private static DataStream<String> streamSource(BenchmarkConfig config, StreamExecutionEnvironment env) { // Choose a source -- Either local generator or Kafka RichParallelSourceFunction<String> source; String sourceName; if (config.useLocalEventGenerator) { EventGeneratorSource eventGenerator = new EventGeneratorSource(config); source = eventGenerator; sourceName = "EventGenerator"; Map<String, List<String>> campaigns = eventGenerator.getCampaigns(); RedisHelper redisHelper = new RedisHelper(config); redisHelper.prepareRedis(campaigns); redisHelper.writeCampaignFile(campaigns); } else { source = kafkaSource(config); sourceName = "Kafka"; } return env.addSource(source, sourceName); }
Example #2
Source File: AdvertisingTopologyFlinkState.java From yahoo-streaming-benchmark with Apache License 2.0 | 6 votes |
/** * Choose source - either Kafka or data generator */ private static DataStream<String> sourceStream(BenchmarkConfig config, StreamExecutionEnvironment env) { RichParallelSourceFunction<String> source; String sourceName; if (config.useLocalEventGenerator) { EventGeneratorSource eventGenerator = new EventGeneratorSource(config); source = eventGenerator; sourceName = "EventGenerator"; prepareRedis(config, eventGenerator); } else { source = kafkaSource(config); sourceName = "Kafka"; } return env.addSource(source, sourceName); }
Example #3
Source File: AdvertisingTopologyRedisDirect.java From yahoo-streaming-benchmark with Apache License 2.0 | 5 votes |
/** * Choose either Kafka or data generator as source */ private static DataStream<String> sourceStream(BenchmarkConfig config, StreamExecutionEnvironment env) { RichParallelSourceFunction<String> source; String sourceName; if (config.useLocalEventGenerator) { HighKeyCardinalityGeneratorSource eventGenerator = new HighKeyCardinalityGeneratorSource(config); source = eventGenerator; sourceName = "EventGenerator"; } else { source = new FlinkKafkaConsumer082<>(config.kafkaTopic, new SimpleStringSchema(), config.getParameters().getProperties()); sourceName = "Kafka"; } return env.addSource(source, sourceName); }
Example #4
Source File: Kafka011SourceBuilder.java From Alink with Apache License 2.0 | 5 votes |
@Override public RichParallelSourceFunction<Row> build() { FlinkKafkaConsumer011<Row> consumer; if (!StringUtils.isNullOrWhitespaceOnly(topicPattern)) { Pattern pattern = Pattern.compile(topicPattern); consumer = new FlinkKafkaConsumer011<Row>(pattern, new MessageDeserialization(), properties); } else { consumer = new FlinkKafkaConsumer011<Row>(topic, new MessageDeserialization(), properties); } switch (super.startupMode) { case LATEST: { consumer.setStartFromLatest(); break; } case EARLIEST: { consumer.setStartFromEarliest(); break; } case GROUP_OFFSETS: { consumer.setStartFromGroupOffsets(); break; } case TIMESTAMP: { consumer.setStartFromTimestamp(startTimeMs); break; } default: { throw new IllegalArgumentException("invalid startupMode."); } } return consumer; }
Example #5
Source File: AdvertisingTopologyFlinkStateHighKeyCard.java From yahoo-streaming-benchmark with Apache License 2.0 | 5 votes |
/** * Choose data source, either Kafka or data generator */ private static DataStream<String> streamSource(BenchmarkConfig config, StreamExecutionEnvironment env) { RichParallelSourceFunction<String> source; String sourceName; if (config.useLocalEventGenerator) { HighKeyCardinalityGeneratorSource eventGenerator = new HighKeyCardinalityGeneratorSource(config); source = eventGenerator; sourceName = "EventGenerator"; } else { source = kafkaSource(config); sourceName = "Kafka"; } return env.addSource(source, sourceName); }
Example #6
Source File: KafkaSourceBuilder.java From Alink with Apache License 2.0 | 5 votes |
@Override public RichParallelSourceFunction<Row> build() { FlinkKafkaConsumer<Row> consumer; if (!StringUtils.isNullOrWhitespaceOnly(topicPattern)) { Pattern pattern = Pattern.compile(topicPattern); consumer = new FlinkKafkaConsumer<Row>(pattern, new MessageDeserialization(), properties); } else { consumer = new FlinkKafkaConsumer<Row>(topic, new MessageDeserialization(), properties); } switch (super.startupMode) { case LATEST: { consumer.setStartFromLatest(); break; } case EARLIEST: { consumer.setStartFromEarliest(); break; } case GROUP_OFFSETS: { consumer.setStartFromGroupOffsets(); break; } case TIMESTAMP: { consumer.setStartFromTimestamp(startTimeMs); break; } default: { throw new IllegalArgumentException("invalid startupMode."); } } return consumer; }
Example #7
Source File: Kafka010SourceBuilder.java From Alink with Apache License 2.0 | 5 votes |
@Override public RichParallelSourceFunction<Row> build() { FlinkKafkaConsumer010<Row> consumer; if (!StringUtils.isNullOrWhitespaceOnly(topicPattern)) { Pattern pattern = Pattern.compile(topicPattern); consumer = new FlinkKafkaConsumer010<Row>(pattern, new MessageDeserialization(), properties); } else { consumer = new FlinkKafkaConsumer010<Row>(topic, new MessageDeserialization(), properties); } switch (super.startupMode) { case LATEST: { consumer.setStartFromLatest(); break; } case EARLIEST: { consumer.setStartFromEarliest(); break; } case GROUP_OFFSETS: { consumer.setStartFromGroupOffsets(); break; } case TIMESTAMP: { consumer.setStartFromTimestamp(startTimeMs); break; } default: { throw new IllegalArgumentException("invalid startupMode."); } } return consumer; }
Example #8
Source File: ChainingSpeed.java From flink-perf with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); final ParameterTool pt = ParameterTool.fromArgs(args); see.getConfig().setGlobalJobParameters(pt); see.getConfig().enableObjectReuse(); // see.setParallelism(1); DataStreamSource<Integer> src = see.addSource(new RichParallelSourceFunction<Integer>() { boolean running = true; @Override public void run(SourceContext<Integer> ctx) throws Exception { int i = 0; while (running) { ctx.collect(i++); } } @Override public void cancel() { running = false; } }); src/*.map(new MapFunction<Integer, Integer>() { @Override public Integer map(Integer s) throws Exception { return s; } }).*/.map(new MapFunction<Integer, Integer>() { @Override public Integer map(Integer s) throws Exception { return s; } }).flatMap(new RichFlatMapFunction<Integer, Integer>() { long received = 0; long logfreq = pt.getInt("logfreq"); long lastLog = -1; long lastElements = 0; long matches = 0; private final Pattern threeDigitAbbr = Pattern.compile("[A-Z]{3}\\."); @Override public void open(Configuration parameters) throws Exception { super.open(parameters); } @Override public void flatMap(Integer in, Collector<Integer> collector) throws Exception { received++; if (received % logfreq == 0) { // throughput over entire time long now = System.currentTimeMillis(); // throughput for the last "logfreq" elements if (lastLog == -1) { // init (the first) lastLog = now; lastElements = received; } else { long timeDiff = now - lastLog; long elementDiff = received - lastElements; double ex = (1000 / (double) timeDiff); LOG.info("During the last {} ms, we received {} elements. That's {} elements/second/core", timeDiff, elementDiff, Double.valueOf(elementDiff * ex).longValue()); // reinit lastLog = now; lastElements = received; } } } }); see.execute(); }
Example #9
Source File: KafkaShortRetentionTestBase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public void runAutoOffsetResetTest() throws Exception { final String topic = "auto-offset-reset-test"; final int parallelism = 1; final int elementsPerPartition = 50000; Properties tprops = new Properties(); tprops.setProperty("retention.ms", "250"); kafkaServer.createTestTopic(topic, parallelism, 1, tprops); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately env.getConfig().disableSysoutLogging(); // ----------- add producer dataflow ---------- DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() { private boolean running = true; @Override public void run(SourceContext<String> ctx) throws InterruptedException { int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition; int limit = cnt + elementsPerPartition; while (running && !stopProducer && cnt < limit) { ctx.collect("element-" + cnt); cnt++; Thread.sleep(10); } LOG.info("Stopping producer"); } @Override public void cancel() { running = false; } }); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), props, null); // ----------- add consumer dataflow ---------- NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema(); FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props); DataStreamSource<String> consuming = env.addSource(source); consuming.addSink(new DiscardingSink<String>()); tryExecute(env, "run auto offset reset test"); kafkaServer.deleteTestTopic(topic); }
Example #10
Source File: KafkaConsumerTestBase.java From flink with Apache License 2.0 | 4 votes |
protected void writeAppendSequence( String topicName, final int originalNumElements, final int numElementsToAppend, final int parallelism) throws Exception { LOG.info("\n===================================\n" + "== Appending sequence of " + numElementsToAppend + " into " + topicName + "==================================="); final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {}); final SerializationSchema<Tuple2<Integer, Integer>> serSchema = new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()); final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KafkaDeserializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); // -------- Write the append sequence -------- StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment(); writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart()); DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() { private boolean running = true; @Override public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception { int cnt = originalNumElements; int partition = getRuntimeContext().getIndexOfThisSubtask(); while (running && cnt < numElementsToAppend + originalNumElements) { ctx.collect(new Tuple2<>(partition, cnt)); cnt++; } } @Override public void cancel() { running = false; } }).setParallelism(parallelism); // the producer must not produce duplicates Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings); producerProperties.setProperty("retries", "0"); producerProperties.putAll(secureProps); kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism)) .setParallelism(parallelism); try { writeEnv.execute("Write sequence"); } catch (Exception e) { throw new Exception("Failed to append sequence to Kafka; append job failed.", e); } LOG.info("Finished writing append sequence"); // we need to validate the sequence, because kafka's producers are not exactly once LOG.info("Validating sequence"); while (!getRunningJobs(client).isEmpty()){ Thread.sleep(50); } if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) { throw new Exception("Could not append a valid sequence to Kafka."); } }
Example #11
Source File: KafkaConsumerTestBase.java From flink with Apache License 2.0 | 4 votes |
protected String writeSequence( String baseTopicName, final int numElements, final int parallelism, final int replicationFactor) throws Exception { LOG.info("\n===================================\n" + "== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" + "==================================="); final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {}); final SerializationSchema<Tuple2<Integer, Integer>> serSchema = new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()); final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KafkaDeserializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); final int maxNumAttempts = 10; for (int attempt = 1; attempt <= maxNumAttempts; attempt++) { final String topicName = baseTopicName + '-' + attempt; LOG.info("Writing attempt #" + attempt); // -------- Write the Sequence -------- createTestTopic(topicName, parallelism, replicationFactor); StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment(); writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart()); DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() { private boolean running = true; @Override public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception { int cnt = 0; int partition = getRuntimeContext().getIndexOfThisSubtask(); while (running && cnt < numElements) { ctx.collect(new Tuple2<>(partition, cnt)); cnt++; } } @Override public void cancel() { running = false; } }).setParallelism(parallelism); // the producer must not produce duplicates Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings); producerProperties.setProperty("retries", "0"); producerProperties.putAll(secureProps); kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism)) .setParallelism(parallelism); try { writeEnv.execute("Write sequence"); } catch (Exception e) { LOG.error("Write attempt failed, trying again", e); deleteTestTopic(topicName); waitUntilNoJobIsRunning(client); continue; } LOG.info("Finished writing sequence"); // -------- Validate the Sequence -------- // we need to validate the sequence, because kafka's producers are not exactly once LOG.info("Validating sequence"); waitUntilNoJobIsRunning(client); if (validateSequence(topicName, parallelism, deserSchema, numElements)) { // everything is good! return topicName; } else { deleteTestTopic(topicName); // fall through the loop } } throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts"); }
Example #12
Source File: KafkaShortRetentionTestBase.java From flink with Apache License 2.0 | 4 votes |
public void runAutoOffsetResetTest() throws Exception { final String topic = "auto-offset-reset-test"; final int parallelism = 1; final int elementsPerPartition = 50000; Properties tprops = new Properties(); tprops.setProperty("retention.ms", "250"); kafkaServer.createTestTopic(topic, parallelism, 1, tprops); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately // ----------- add producer dataflow ---------- DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() { private boolean running = true; @Override public void run(SourceContext<String> ctx) throws InterruptedException { int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition; int limit = cnt + elementsPerPartition; while (running && !stopProducer && cnt < limit) { ctx.collect("element-" + cnt); cnt++; Thread.sleep(10); } LOG.info("Stopping producer"); } @Override public void cancel() { running = false; } }); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); kafkaServer.produceIntoKafka(stream, topic, new SimpleStringSchema(), props, null); // ----------- add consumer dataflow ---------- NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema(); FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props); DataStreamSource<String> consuming = env.addSource(source); consuming.addSink(new DiscardingSink<String>()); tryExecute(env, "run auto offset reset test"); kafkaServer.deleteTestTopic(topic); }
Example #13
Source File: StateMain.java From flink-learning with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(10000); env.setStateBackend(new MemoryStateBackend()); env.addSource(new RichParallelSourceFunction<Tuple2<String, Long>>() { @Override public void run(SourceContext<Tuple2<String, Long>> sourceContext) throws Exception { while (true) { sourceContext.collect(new Tuple2<>(String.valueOf(System.currentTimeMillis()), System.currentTimeMillis())); Thread.sleep(10); } } @Override public void cancel() { } }).keyBy(0) .map(new RichMapFunction<Tuple2<String, Long>, Tuple2<String, Long>>() { private ValueState<Long> state; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); state = getRuntimeContext().getState( new ValueStateDescriptor<>("uvState", TypeInformation.of(new TypeHint<Long>() { }))); } @Override public Tuple2<String, Long> map(Tuple2<String, Long> tuple2) throws Exception { state.update(tuple2.f1); return tuple2; } }).print(); env.execute(); }
Example #14
Source File: KafkaConsumerTestBase.java From flink with Apache License 2.0 | 4 votes |
protected void writeAppendSequence( String topicName, final int originalNumElements, final int numElementsToAppend, final int parallelism) throws Exception { LOG.info("\n===================================\n" + "== Appending sequence of " + numElementsToAppend + " into " + topicName + "==================================="); final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {}); final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema = new KeyedSerializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KafkaDeserializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); // -------- Write the append sequence -------- StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment(); writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart()); writeEnv.getConfig().disableSysoutLogging(); DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() { private boolean running = true; @Override public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception { int cnt = originalNumElements; int partition = getRuntimeContext().getIndexOfThisSubtask(); while (running && cnt < numElementsToAppend + originalNumElements) { ctx.collect(new Tuple2<>(partition, cnt)); cnt++; } } @Override public void cancel() { running = false; } }).setParallelism(parallelism); // the producer must not produce duplicates Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings); producerProperties.setProperty("retries", "0"); producerProperties.putAll(secureProps); kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism)) .setParallelism(parallelism); try { writeEnv.execute("Write sequence"); } catch (Exception e) { throw new Exception("Failed to append sequence to Kafka; append job failed.", e); } LOG.info("Finished writing append sequence"); // we need to validate the sequence, because kafka's producers are not exactly once LOG.info("Validating sequence"); while (!getRunningJobs(client).isEmpty()){ Thread.sleep(50); } if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) { throw new Exception("Could not append a valid sequence to Kafka."); } }
Example #15
Source File: KafkaConsumerTestBase.java From flink with Apache License 2.0 | 4 votes |
protected String writeSequence( String baseTopicName, final int numElements, final int parallelism, final int replicationFactor) throws Exception { LOG.info("\n===================================\n" + "== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" + "==================================="); final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {}); final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema = new KeyedSerializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KafkaDeserializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); final int maxNumAttempts = 10; for (int attempt = 1; attempt <= maxNumAttempts; attempt++) { final String topicName = baseTopicName + '-' + attempt; LOG.info("Writing attempt #" + attempt); // -------- Write the Sequence -------- createTestTopic(topicName, parallelism, replicationFactor); StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment(); writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart()); writeEnv.getConfig().disableSysoutLogging(); DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() { private boolean running = true; @Override public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception { int cnt = 0; int partition = getRuntimeContext().getIndexOfThisSubtask(); while (running && cnt < numElements) { ctx.collect(new Tuple2<>(partition, cnt)); cnt++; } } @Override public void cancel() { running = false; } }).setParallelism(parallelism); // the producer must not produce duplicates Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings); producerProperties.setProperty("retries", "0"); producerProperties.putAll(secureProps); kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism)) .setParallelism(parallelism); try { writeEnv.execute("Write sequence"); } catch (Exception e) { LOG.error("Write attempt failed, trying again", e); deleteTestTopic(topicName); waitUntilNoJobIsRunning(client); continue; } LOG.info("Finished writing sequence"); // -------- Validate the Sequence -------- // we need to validate the sequence, because kafka's producers are not exactly once LOG.info("Validating sequence"); waitUntilNoJobIsRunning(client); if (validateSequence(topicName, parallelism, deserSchema, numElements)) { // everything is good! return topicName; } else { deleteTestTopic(topicName); // fall through the loop } } throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts"); }
Example #16
Source File: KafkaShortRetentionTestBase.java From flink with Apache License 2.0 | 4 votes |
public void runAutoOffsetResetTest() throws Exception { final String topic = "auto-offset-reset-test"; final int parallelism = 1; final int elementsPerPartition = 50000; Properties tprops = new Properties(); tprops.setProperty("retention.ms", "250"); kafkaServer.createTestTopic(topic, parallelism, 1, tprops); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately env.getConfig().disableSysoutLogging(); // ----------- add producer dataflow ---------- DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() { private boolean running = true; @Override public void run(SourceContext<String> ctx) throws InterruptedException { int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition; int limit = cnt + elementsPerPartition; while (running && !stopProducer && cnt < limit) { ctx.collect("element-" + cnt); cnt++; Thread.sleep(10); } LOG.info("Stopping producer"); } @Override public void cancel() { running = false; } }); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), props, null); // ----------- add consumer dataflow ---------- NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema(); FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props); DataStreamSource<String> consuming = env.addSource(source); consuming.addSink(new DiscardingSink<String>()); tryExecute(env, "run auto offset reset test"); kafkaServer.deleteTestTopic(topic); }
Example #17
Source File: StateMain.java From flink-learning with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(10000); env.setStateBackend(new MemoryStateBackend()); env.addSource(new RichParallelSourceFunction<Tuple2<String, Long>>() { @Override public void run(SourceContext<Tuple2<String, Long>> sourceContext) throws Exception { while (true) { sourceContext.collect(new Tuple2<>(String.valueOf(System.currentTimeMillis()), System.currentTimeMillis())); Thread.sleep(10); } } @Override public void cancel() { } }).keyBy(0) .map(new RichMapFunction<Tuple2<String, Long>, Tuple2<String, Long>>() { private ValueState<Long> state; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); state = getRuntimeContext().getState( new ValueStateDescriptor<>("uvState", TypeInformation.of(new TypeHint<Long>() { }))); } @Override public Tuple2<String, Long> map(Tuple2<String, Long> tuple2) throws Exception { state.update(tuple2.f1); return tuple2; } }).print(); env.execute(); }
Example #18
Source File: KafkaConsumerTestBase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
protected void writeAppendSequence( String topicName, final int originalNumElements, final int numElementsToAppend, final int parallelism) throws Exception { LOG.info("\n===================================\n" + "== Appending sequence of " + numElementsToAppend + " into " + topicName + "==================================="); final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {}); final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema = new KeyedSerializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KafkaDeserializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); // -------- Write the append sequence -------- StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment(); writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart()); writeEnv.getConfig().disableSysoutLogging(); DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() { private boolean running = true; @Override public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception { int cnt = originalNumElements; int partition = getRuntimeContext().getIndexOfThisSubtask(); while (running && cnt < numElementsToAppend + originalNumElements) { ctx.collect(new Tuple2<>(partition, cnt)); cnt++; } } @Override public void cancel() { running = false; } }).setParallelism(parallelism); // the producer must not produce duplicates Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings); producerProperties.setProperty("retries", "0"); producerProperties.putAll(secureProps); kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism)) .setParallelism(parallelism); try { writeEnv.execute("Write sequence"); } catch (Exception e) { throw new Exception("Failed to append sequence to Kafka; append job failed.", e); } LOG.info("Finished writing append sequence"); // we need to validate the sequence, because kafka's producers are not exactly once LOG.info("Validating sequence"); while (!getRunningJobs(client).isEmpty()){ Thread.sleep(50); } if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) { throw new Exception("Could not append a valid sequence to Kafka."); } }
Example #19
Source File: KafkaConsumerTestBase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
protected String writeSequence( String baseTopicName, final int numElements, final int parallelism, final int replicationFactor) throws Exception { LOG.info("\n===================================\n" + "== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" + "==================================="); final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {}); final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema = new KeyedSerializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KafkaDeserializationSchemaWrapper<>( new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig())); final int maxNumAttempts = 10; for (int attempt = 1; attempt <= maxNumAttempts; attempt++) { final String topicName = baseTopicName + '-' + attempt; LOG.info("Writing attempt #" + attempt); // -------- Write the Sequence -------- createTestTopic(topicName, parallelism, replicationFactor); StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment(); writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart()); writeEnv.getConfig().disableSysoutLogging(); DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() { private boolean running = true; @Override public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception { int cnt = 0; int partition = getRuntimeContext().getIndexOfThisSubtask(); while (running && cnt < numElements) { ctx.collect(new Tuple2<>(partition, cnt)); cnt++; } } @Override public void cancel() { running = false; } }).setParallelism(parallelism); // the producer must not produce duplicates Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings); producerProperties.setProperty("retries", "0"); producerProperties.putAll(secureProps); kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism)) .setParallelism(parallelism); try { writeEnv.execute("Write sequence"); } catch (Exception e) { LOG.error("Write attempt failed, trying again", e); deleteTestTopic(topicName); waitUntilNoJobIsRunning(client); continue; } LOG.info("Finished writing sequence"); // -------- Validate the Sequence -------- // we need to validate the sequence, because kafka's producers are not exactly once LOG.info("Validating sequence"); waitUntilNoJobIsRunning(client); if (validateSequence(topicName, parallelism, deserSchema, numElements)) { // everything is good! return topicName; } else { deleteTestTopic(topicName); // fall through the loop } } throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts"); }
Example #20
Source File: BaseKafkaSourceBuilder.java From Alink with Apache License 2.0 | 2 votes |
/** * Construct the {@link RichParallelSourceFunction} for specific version of Kafka. */ public abstract RichParallelSourceFunction<Row> build();