org.apache.flink.api.common.serialization.SimpleStringSchema Java Exaples

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;

        //下面这些写死的参数可以放在配置文件中，然后通过 parameterTool 获取
        final RMQConnectionConfig connectionConfig = new RMQConnectionConfig
                .Builder().setHost("localhost").setVirtualHost("/")
                .setPort(5672).setUserName("admin").setPassword("admin")
                .build();

        DataStreamSource<String> zhisheng = env.addSource(new RMQSource<>(connectionConfig,
                "zhisheng",
                true,
                new SimpleStringSchema()))
                .setParallelism(1);
        zhisheng.print();

        //如果想保证 exactly-once 或 at-least-once 需要把 checkpoint 开启
//        env.enableCheckpointing(10000);
        env.execute("flink learning connectors rabbitmq");
    }

Source File: FlinkPulsarITest.java From pulsar-flink with Apache License 2.0

6 votes

@Test
public void testRunFailedOnWrongServiceUrl() {

    try {
        Properties props = MapUtils.toProperties(Collections.singletonMap(TOPIC_SINGLE_OPTION_KEY, "tp"));

        StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
        see.getConfig().disableSysoutLogging();
        see.setRestartStrategy(RestartStrategies.noRestart());
        see.setParallelism(1);

        FlinkPulsarSource<String> source =
                new FlinkPulsarSource<String>("sev", "admin", new SimpleStringSchema(), props).setStartFromEarliest();

        DataStream<String> stream = see.addSource(source);
        stream.print();
        see.execute("wrong service url");
    } catch (Exception e) {
        final Optional<Throwable> optionalThrowable = ExceptionUtils.findThrowableWithMessage(e, "authority component is missing");
        assertTrue(optionalThrowable.isPresent());
        assertTrue(optionalThrowable.get() instanceof PulsarClientException);
    }
}

Source File: ConsumeFromKinesis.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties kinesisConsumerConfig = new Properties();
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>(
		"flink-test",
		new SimpleStringSchema(),
		kinesisConsumerConfig));

	kinesis.print();

	see.execute();
}

Source File: KafkaShortRetentionTestBase.java From Flink-CEPplus with Apache License 2.0

6 votes

public void runFailOnAutoOffsetResetNoneEager() throws Exception {
	final String topic = "auto-offset-reset-none-test";
	final int parallelism = 1;

	kafkaServer.createTestTopic(topic, parallelism, 1);

	// ----------- add consumer ----------

	Properties customProps = new Properties();
	customProps.putAll(standardProps);
	customProps.putAll(secureProps);
	customProps.setProperty("auto.offset.reset", "none"); // test that "none" leads to an exception

	try {
		kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps);
		fail("should fail with an exception");
	}
	catch (IllegalArgumentException e) {
		// expected
		assertTrue(e.getMessage().contains("none"));
	}

	kafkaServer.deleteTestTopic(topic);
}

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props));

    data.map(new MapFunction<String, Object>() {
        @Override
        public Object map(String string) throws Exception {
            writeEventToHbase(string, parameterTool);
            return string;
        }
    }).print();

    env.execute("flink learning connectors hbase");
}

Source File: KinesisConsumerMain.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);

    Properties kinesisConsumerConfig = new Properties();
    kinesisConsumerConfig.setProperty("aws.region", parameterTool.getRequired("aws.region"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.accesskeyid", parameterTool.getRequired("aws.accesskey"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.secretkey", parameterTool.getRequired("aws.secretkey"));

    DataStream<String> kinesis = env.addSource(new FlinkKinesisConsumer<>(
            "zhisheng",
            new SimpleStringSchema(),
            kinesisConsumerConfig));

    kinesis.print();

    env.execute();
}

Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * This test is meant to assure that testAtLeastOnceProducer is valid by testing that if flushing is disabled,
 * the snapshot method does indeed finishes without waiting for pending records;
 * we set a timeout because the test will not finish if the logic is broken.
 */
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testDoesNotWaitForPendingRecordsIfFlushingDisabled() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
	producer.setFlushOnCheckpoint(false);

	final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();

	final OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg"));

	// make sure that all callbacks have not been completed
	verify(mockProducer, times(1)).send(any(ProducerRecord.class), any(Callback.class));

	// should return even if there are pending records
	testHarness.snapshot(123L, 123L);

	testHarness.close();
}

Source File: ConsumeFromKinesis.java From Flink-CEPplus with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties kinesisConsumerConfig = new Properties();
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>(
		"flink-test",
		new SimpleStringSchema(),
		kinesisConsumerConfig));

	kinesis.print();

	see.execute();
}

Source File: EmulatedPubSubSinkTest.java From flink with Apache License 2.0

6 votes

@Test(expected = Exception.class)
public void testPubSubSinkThrowsExceptionOnFailure() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(100);
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.noRestart());

	// Create test stream
	//use source function to prevent the job from shutting down before a checkpoint has been made
	env.addSource(new SingleInputSourceFunction())

		.map((MapFunction<String, String>) StringUtils::reverse)
		.addSink(PubSubSink.newBuilder()
							.withSerializationSchema(new SimpleStringSchema())
							.withProjectName(PROJECT_NAME)
							.withTopicName(TOPIC_NAME)
							// Specific for emulator
							.withHostAndPortForEmulator("unknown-host-to-force-sink-crash:1234")
							.withCredentials(NoCredentials.getInstance())
							.build()).name("PubSub sink");

	// Run
	env.execute();
}

Source File: ConsumeFromDynamoDBStreams.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties dynamodbStreamsConsumerConfig = new Properties();
	final String streamName = pt.getRequired(DYNAMODB_STREAM_NAME);
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> dynamodbStreams = see.addSource(new FlinkDynamoDBStreamsConsumer<>(
			streamName,
			new SimpleStringSchema(),
			dynamodbStreamsConsumerConfig));

	dynamodbStreams.print();

	see.execute();
}

Source File: KafkaConsumer08Test.java From flink with Apache License 2.0

6 votes

@Test
public void testCreateSourceWithoutCluster() {
	try {
		Properties props = new Properties();
		props.setProperty("zookeeper.connect", "localhost:56794");
		props.setProperty("bootstrap.servers", "localhost:11111, localhost:22222");
		props.setProperty("group.id", "non-existent-group");
		props.setProperty(FlinkKafkaConsumer08.GET_PARTITIONS_RETRIES_KEY, "1");

		FlinkKafkaConsumer08<String> consumer = new FlinkKafkaConsumer08<>(
			Collections.singletonList("no op topic"), new SimpleStringSchema(), props);
		StreamingRuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
		Mockito.when(mockRuntimeContext.isCheckpointingEnabled()).thenReturn(true);
		consumer.setRuntimeContext(mockRuntimeContext);

		consumer.open(new Configuration());

		fail();
	}
	catch (Exception e) {
		assertTrue(e.getMessage().contains("Unable to retrieve any partitions"));
	}
}

Source File: KafkaConsumer08Test.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testCreateSourceWithoutCluster() {
	try {
		Properties props = new Properties();
		props.setProperty("zookeeper.connect", "localhost:56794");
		props.setProperty("bootstrap.servers", "localhost:11111, localhost:22222");
		props.setProperty("group.id", "non-existent-group");
		props.setProperty(FlinkKafkaConsumer08.GET_PARTITIONS_RETRIES_KEY, "1");

		FlinkKafkaConsumer08<String> consumer = new FlinkKafkaConsumer08<>(
			Collections.singletonList("no op topic"), new SimpleStringSchema(), props);
		StreamingRuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
		Mockito.when(mockRuntimeContext.isCheckpointingEnabled()).thenReturn(true);
		consumer.setRuntimeContext(mockRuntimeContext);

		consumer.open(new Configuration());

		fail();
	}
	catch (Exception e) {
		assertTrue(e.getMessage().contains("Unable to retrieve any partitions"));
	}
}

Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Tests that constructor defaults to key value serializers in config to byte array deserializers if not set.
 */
@Test
public void testKeyValueDeserializersSetIfMissing() throws Exception {
	Properties props = new Properties();
	props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345");
	// should set missing key value deserializers
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	assertTrue(props.containsKey(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.containsKey(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
}

Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0

5 votes

/**
 * Tests that partitions list is determinate and correctly provided to custom partitioner.
 */
@SuppressWarnings("unchecked")
@Test
public void testPartitionerInvokedWithDeterminatePartitionList() throws Exception {
	FlinkKafkaPartitioner<String> mockPartitioner = mock(FlinkKafkaPartitioner.class);

	RuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
	when(mockRuntimeContext.getIndexOfThisSubtask()).thenReturn(0);
	when(mockRuntimeContext.getNumberOfParallelSubtasks()).thenReturn(1);

	// out-of-order list of 4 partitions
	List<PartitionInfo> mockPartitionsList = new ArrayList<>(4);
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 3, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 1, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 0, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 2, null, null, null));

	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), mockPartitioner);
	producer.setRuntimeContext(mockRuntimeContext);

	final KafkaProducer mockProducer = producer.getMockKafkaProducer();
	when(mockProducer.partitionsFor(anyString())).thenReturn(mockPartitionsList);
	when(mockProducer.metrics()).thenReturn(null);

	producer.open(new Configuration());
	verify(mockPartitioner, times(1)).open(0, 1);

	producer.invoke("foobar", SinkContextUtil.forTimestamp(0));
	verify(mockPartitioner, times(1)).partition(
		"foobar", null, "foobar".getBytes(), DummyFlinkKafkaProducer.DUMMY_TOPIC, new int[] {0, 1, 2, 3});
}

Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnInvoke() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.processElement(new StreamRecord<>("msg-2"));
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}

Source File: FlinkKinesisProducerTest.java From flink with Apache License 2.0

5 votes

/**
 * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown.
 */
@SuppressWarnings("ResultOfMethodCallIgnored")
@Test
public void testAsyncErrorRethrownOnInvoke() throws Throwable {
	final DummyFlinkKinesisProducer<String> producer = new DummyFlinkKinesisProducer<>(new SimpleStringSchema());

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	producer.getPendingRecordFutures().get(0).setException(new Exception("artificial async exception"));

	try {
		testHarness.processElement(new StreamRecord<>("msg-2"));
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(ExceptionUtils.findThrowableWithMessage(e, "artificial async exception").isPresent());

		// test succeeded
		return;
	}

	Assert.fail();
}

Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Tests that the constructor eagerly checks bootstrap servers are set in config.
 */
@Test(expected = IllegalArgumentException.class)
public void testInstantiationFailsWhenBootstrapServersMissing() throws Exception {
	// no bootstrap servers set in props
	Properties props = new Properties();
	// should throw IllegalArgumentException
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
}

Source File: ShardConsumerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedState() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
	new ShardConsumer<>(
		fetcher,
		shardIndex,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, 500L),
		new ShardMetricsReporter()).run();

	assertEquals(1000, sourceContext.getCollectedOutputs().size());
	assertEquals(
		SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
}

Source File: ShardConsumerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedState() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
	new ShardConsumer<>(
		fetcher,
		shardIndex,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, 500L),
		new ShardMetricsReporter()).run();

	assertEquals(1000, sourceContext.getCollectedOutputs().size());
	assertEquals(
		SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
}

Source File: FlinkKinesisProducerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testConfigureWithNonSerializableCustomPartitionerFails() {
	exception.expect(IllegalArgumentException.class);
	exception.expectMessage("The provided custom partitioner is not serializable");

	new FlinkKinesisProducer<>(new SimpleStringSchema(), TestUtils.getStandardProperties())
		.setCustomPartitioner(new NonSerializableCustomPartitioner());
}

Source File: Main.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    SingleOutputStreamOperator<Student> student = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props)).setParallelism(1)
            .map(string -> GsonUtil.fromJson(string, Student.class)); //博客里面用的是 fastjson，这里用的是gson解析，解析字符串成 student 对象

    student.addSink(new SinkToMySQL()); //数据 sink 到 mysql

    env.execute("Flink data sink");
}

Source File: Main.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
        Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

        SingleOutputStreamOperator<Tuple2<String, String>> product = env.addSource(new FlinkKafkaConsumer011<>(
                parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
                new SimpleStringSchema(),
                props))
                .map(string -> GsonUtil.fromJson(string, ProductEvent.class)) //反序列化 JSON
                .flatMap(new FlatMapFunction<ProductEvent, Tuple2<String, String>>() {
                    @Override
                    public void flatMap(ProductEvent value, Collector<Tuple2<String, String>> out) throws Exception {
                        //收集商品 id 和 price 两个属性
                        out.collect(new Tuple2<>(value.getId().toString(), value.getPrice().toString()));
                    }
                });
//        product.print();

        //单个 Redis
        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost(parameterTool.get("redis.host")).build();
        product.addSink(new RedisSink<Tuple2<String, String>>(conf, new RedisSinkMapper()));

        //Redis 的 ip 信息一般都从配置文件取出来
        //Redis 集群
/*        FlinkJedisClusterConfig clusterConfig = new FlinkJedisClusterConfig.Builder()
                .setNodes(new HashSet<InetSocketAddress>(
                        Arrays.asList(new InetSocketAddress("redis1", 6379)))).build();*/

        //Redis Sentinels
/*        FlinkJedisSentinelConfig sentinelConfig = new FlinkJedisSentinelConfig.Builder()
                .setMasterName("master")
                .setSentinels(new HashSet<>(Arrays.asList("sentinel1", "sentinel2")))
                .setPassword("")
                .setDatabase(1).build();*/

        env.execute("flink redis connector");
    }

Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0

5 votes

/**
 * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnInvoke() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.processElement(new StreamRecord<>("msg-2"));
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}

Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0

5 votes

/**
 * Test ensuring that if a snapshot call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnCheckpoint() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.snapshot(123L, 123L);
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}

Source File: TuningKeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后，将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }

Source File: KeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端，并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式，强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }

Source File: KafkaShortRetentionTestBase.java From flink with Apache License 2.0

5 votes

/**
 * Ensure that the consumer is properly failing if "auto.offset.reset" is set to "none".
 */
public void runFailOnAutoOffsetResetNone() throws Exception {
	final String topic = "auto-offset-reset-none-test";
	final int parallelism = 1;

	kafkaServer.createTestTopic(topic, parallelism, 1);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately
	env.getConfig().disableSysoutLogging();

	// ----------- add consumer ----------

	Properties customProps = new Properties();
	customProps.putAll(standardProps);
	customProps.putAll(secureProps);
	customProps.setProperty("auto.offset.reset", "none"); // test that "none" leads to an exception
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	try {
		env.execute("Test auto offset reset none");
	} catch (Throwable e) {
		// check if correct exception has been thrown
		if (!e.getCause().getCause().getMessage().contains("Unable to find previous offset")  // kafka 0.8
			&& !e.getCause().getCause().getMessage().contains("Undefined offset with no reset policy for partition") // kafka 0.9
				) {
			throw e;
		}
	}

	kafkaServer.deleteTestTopic(topic);
}

Source File: KafkaConsumer08Test.java From flink with Apache License 2.0

5 votes

@Test
public void testAllBoostrapServerHostsAreInvalid() {
	try {
		String unknownHost = "foobar:11111";

		URL unknownHostURL = NetUtils.getCorrectHostnamePort(unknownHost);

		PowerMockito.mockStatic(InetAddress.class);
		when(InetAddress.getByName(Matchers.eq(unknownHostURL.getHost()))).thenThrow(new UnknownHostException("Test exception"));

		String zookeeperConnect = "localhost:56794";
		String groupId = "non-existent-group";
		Properties props = createKafkaProps(zookeeperConnect, unknownHost, groupId);

		FlinkKafkaConsumer08<String> consumer = new FlinkKafkaConsumer08<>(
			Collections.singletonList("no op topic"), new SimpleStringSchema(), props);
		StreamingRuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
		Mockito.when(mockRuntimeContext.isCheckpointingEnabled()).thenReturn(true);
		consumer.setRuntimeContext(mockRuntimeContext);

		consumer.open(new Configuration());

		fail();
	} catch (Exception expected) {
		assertTrue("Exception should be thrown containing 'all bootstrap servers invalid' message!",
				expected.getMessage().contains("All the servers provided in: '" + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG
						+ "' config are invalid"));
	}
}

Source File: KafkaConsumer08Test.java From flink with Apache License 2.0

5 votes

@Test
public void testAtLeastOneBootstrapServerHostIsValid() throws Exception {
	try {
		String zookeeperConnect = "localhost:56794";
		String unknownHost = "foobar:11111";
		// we declare one valid bootstrap server, namely the one with 'localhost'
		String bootstrapServers = unknownHost + ", localhost:22222";

		URL unknownHostURL = NetUtils.getCorrectHostnamePort(unknownHost);

		PowerMockito.mockStatic(InetAddress.class);
		when(InetAddress.getByName(Matchers.eq(unknownHostURL.getHost()))).thenThrow(new UnknownHostException("Test exception"));

		String groupId = "non-existent-group";
		Properties props = createKafkaProps(zookeeperConnect, bootstrapServers, groupId);
		DummyFlinkKafkaConsumer consumer = new DummyFlinkKafkaConsumer(
			"no op topic",
			new SimpleStringSchema(),
			props);
		consumer.open(new Configuration());

		// no exception should be thrown, because we have one valid bootstrap server; test passes if we reach here
	} catch (Exception e) {
		assertFalse("No exception should be thrown containing 'all bootstrap servers invalid' message!",
			e.getMessage().contains("All the servers provided in: '" + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG
				+ "' config are invalid"));
	}
}

Source File: EmulatedPubSubSinkTest.java From flink with Apache License 2.0

5 votes

@Test
public void testFlinkSink() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	List<String> input = Arrays.asList("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eigth", "Nine", "Ten");

	// Create test stream
	DataStream<String> theData = env
		.fromCollection(input)
		.name("Test input")
		.map((MapFunction<String, String>) StringUtils::reverse);

	// Sink into pubsub
	theData
		.addSink(PubSubSink.newBuilder()
							.withSerializationSchema(new SimpleStringSchema())
							.withProjectName(PROJECT_NAME)
							.withTopicName(TOPIC_NAME)
						   // Specific for emulator
						.withHostAndPortForEmulator(getPubSubHostPort())
						.withCredentials(NoCredentials.getInstance())
						.build())
		.name("PubSub sink");

	// Run
	env.execute();

	// Now get the result from PubSub and verify if everything is there
	List<ReceivedMessage> receivedMessages = pubsubHelper.pullMessages(PROJECT_NAME, SUBSCRIPTION_NAME, 100);

	assertEquals("Wrong number of elements", input.size(), receivedMessages.size());

	// Check output strings
	List<String> output = new ArrayList<>();
	receivedMessages.forEach(msg -> output.add(msg.getMessage().getData().toStringUtf8()));

	for (String test : input) {
		assertTrue("Missing " + test, output.contains(StringUtils.reverse(test)));
	}
}

org.apache.flink.api.common.serialization.SimpleStringSchema Java Examples