org.apache.flink.api.common.serialization.SimpleStringSchema Java Examples
The following examples show how to use
org.apache.flink.api.common.serialization.SimpleStringSchema.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL; //下面这些写死的参数可以放在配置文件中,然后通过 parameterTool 获取 final RMQConnectionConfig connectionConfig = new RMQConnectionConfig .Builder().setHost("localhost").setVirtualHost("/") .setPort(5672).setUserName("admin").setPassword("admin") .build(); DataStreamSource<String> zhisheng = env.addSource(new RMQSource<>(connectionConfig, "zhisheng", true, new SimpleStringSchema())) .setParallelism(1); zhisheng.print(); //如果想保证 exactly-once 或 at-least-once 需要把 checkpoint 开启 // env.enableCheckpointing(10000); env.execute("flink learning connectors rabbitmq"); }
Example #2
Source File: FlinkPulsarITest.java From pulsar-flink with Apache License 2.0 | 6 votes |
@Test public void testRunFailedOnWrongServiceUrl() { try { Properties props = MapUtils.toProperties(Collections.singletonMap(TOPIC_SINGLE_OPTION_KEY, "tp")); StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.getConfig().disableSysoutLogging(); see.setRestartStrategy(RestartStrategies.noRestart()); see.setParallelism(1); FlinkPulsarSource<String> source = new FlinkPulsarSource<String>("sev", "admin", new SimpleStringSchema(), props).setStartFromEarliest(); DataStream<String> stream = see.addSource(source); stream.print(); see.execute("wrong service url"); } catch (Exception e) { final Optional<Throwable> optionalThrowable = ExceptionUtils.findThrowableWithMessage(e, "authority component is missing"); assertTrue(optionalThrowable.isPresent()); assertTrue(optionalThrowable.get() instanceof PulsarClientException); } }
Example #3
Source File: ConsumeFromKinesis.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool pt = ParameterTool.fromArgs(args); StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.setParallelism(1); Properties kinesisConsumerConfig = new Properties(); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region")); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey")); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey")); DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>( "flink-test", new SimpleStringSchema(), kinesisConsumerConfig)); kinesis.print(); see.execute(); }
Example #4
Source File: KafkaShortRetentionTestBase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public void runFailOnAutoOffsetResetNoneEager() throws Exception { final String topic = "auto-offset-reset-none-test"; final int parallelism = 1; kafkaServer.createTestTopic(topic, parallelism, 1); // ----------- add consumer ---------- Properties customProps = new Properties(); customProps.putAll(standardProps); customProps.putAll(secureProps); customProps.setProperty("auto.offset.reset", "none"); // test that "none" leads to an exception try { kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps); fail("should fail with an exception"); } catch (IllegalArgumentException e) { // expected assertTrue(e.getMessage().contains("none")); } kafkaServer.deleteTestTopic(topic); }
Example #5
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool); DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>( parameterTool.get(METRICS_TOPIC), //这个 kafka topic 需要和上面的工具类的 topic 一致 new SimpleStringSchema(), props)); data.map(new MapFunction<String, Object>() { @Override public Object map(String string) throws Exception { writeEventToHbase(string, parameterTool); return string; } }).print(); env.execute("flink learning connectors hbase"); }
Example #6
Source File: KinesisConsumerMain.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); Properties kinesisConsumerConfig = new Properties(); kinesisConsumerConfig.setProperty("aws.region", parameterTool.getRequired("aws.region")); kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.accesskeyid", parameterTool.getRequired("aws.accesskey")); kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.secretkey", parameterTool.getRequired("aws.secretkey")); DataStream<String> kinesis = env.addSource(new FlinkKinesisConsumer<>( "zhisheng", new SimpleStringSchema(), kinesisConsumerConfig)); kinesis.print(); env.execute(); }
Example #7
Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * This test is meant to assure that testAtLeastOnceProducer is valid by testing that if flushing is disabled, * the snapshot method does indeed finishes without waiting for pending records; * we set a timeout because the test will not finish if the logic is broken. */ @SuppressWarnings("unchecked") @Test(timeout = 5000) public void testDoesNotWaitForPendingRecordsIfFlushingDisabled() throws Throwable { final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); producer.setFlushOnCheckpoint(false); final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer(); final OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer)); testHarness.open(); testHarness.processElement(new StreamRecord<>("msg")); // make sure that all callbacks have not been completed verify(mockProducer, times(1)).send(any(ProducerRecord.class), any(Callback.class)); // should return even if there are pending records testHarness.snapshot(123L, 123L); testHarness.close(); }
Example #8
Source File: ConsumeFromKinesis.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool pt = ParameterTool.fromArgs(args); StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.setParallelism(1); Properties kinesisConsumerConfig = new Properties(); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region")); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey")); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey")); DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>( "flink-test", new SimpleStringSchema(), kinesisConsumerConfig)); kinesis.print(); see.execute(); }
Example #9
Source File: EmulatedPubSubSinkTest.java From flink with Apache License 2.0 | 6 votes |
@Test(expected = Exception.class) public void testPubSubSinkThrowsExceptionOnFailure() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(100); env.setParallelism(1); env.setRestartStrategy(RestartStrategies.noRestart()); // Create test stream //use source function to prevent the job from shutting down before a checkpoint has been made env.addSource(new SingleInputSourceFunction()) .map((MapFunction<String, String>) StringUtils::reverse) .addSink(PubSubSink.newBuilder() .withSerializationSchema(new SimpleStringSchema()) .withProjectName(PROJECT_NAME) .withTopicName(TOPIC_NAME) // Specific for emulator .withHostAndPortForEmulator("unknown-host-to-force-sink-crash:1234") .withCredentials(NoCredentials.getInstance()) .build()).name("PubSub sink"); // Run env.execute(); }
Example #10
Source File: ConsumeFromDynamoDBStreams.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool pt = ParameterTool.fromArgs(args); StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.setParallelism(1); Properties dynamodbStreamsConsumerConfig = new Properties(); final String streamName = pt.getRequired(DYNAMODB_STREAM_NAME); dynamodbStreamsConsumerConfig.setProperty( ConsumerConfigConstants.AWS_REGION, pt.getRequired("region")); dynamodbStreamsConsumerConfig.setProperty( ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey")); dynamodbStreamsConsumerConfig.setProperty( ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey")); DataStream<String> dynamodbStreams = see.addSource(new FlinkDynamoDBStreamsConsumer<>( streamName, new SimpleStringSchema(), dynamodbStreamsConsumerConfig)); dynamodbStreams.print(); see.execute(); }
Example #11
Source File: KafkaConsumer08Test.java From flink with Apache License 2.0 | 6 votes |
@Test public void testCreateSourceWithoutCluster() { try { Properties props = new Properties(); props.setProperty("zookeeper.connect", "localhost:56794"); props.setProperty("bootstrap.servers", "localhost:11111, localhost:22222"); props.setProperty("group.id", "non-existent-group"); props.setProperty(FlinkKafkaConsumer08.GET_PARTITIONS_RETRIES_KEY, "1"); FlinkKafkaConsumer08<String> consumer = new FlinkKafkaConsumer08<>( Collections.singletonList("no op topic"), new SimpleStringSchema(), props); StreamingRuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class); Mockito.when(mockRuntimeContext.isCheckpointingEnabled()).thenReturn(true); consumer.setRuntimeContext(mockRuntimeContext); consumer.open(new Configuration()); fail(); } catch (Exception e) { assertTrue(e.getMessage().contains("Unable to retrieve any partitions")); } }
Example #12
Source File: KafkaConsumer08Test.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testCreateSourceWithoutCluster() { try { Properties props = new Properties(); props.setProperty("zookeeper.connect", "localhost:56794"); props.setProperty("bootstrap.servers", "localhost:11111, localhost:22222"); props.setProperty("group.id", "non-existent-group"); props.setProperty(FlinkKafkaConsumer08.GET_PARTITIONS_RETRIES_KEY, "1"); FlinkKafkaConsumer08<String> consumer = new FlinkKafkaConsumer08<>( Collections.singletonList("no op topic"), new SimpleStringSchema(), props); StreamingRuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class); Mockito.when(mockRuntimeContext.isCheckpointingEnabled()).thenReturn(true); consumer.setRuntimeContext(mockRuntimeContext); consumer.open(new Configuration()); fail(); } catch (Exception e) { assertTrue(e.getMessage().contains("Unable to retrieve any partitions")); } }
Example #13
Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that constructor defaults to key value serializers in config to byte array deserializers if not set. */ @Test public void testKeyValueDeserializersSetIfMissing() throws Exception { Properties props = new Properties(); props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345"); // should set missing key value deserializers new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); assertTrue(props.containsKey(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG)); assertTrue(props.containsKey(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG)); assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName())); assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName())); }
Example #14
Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that partitions list is determinate and correctly provided to custom partitioner. */ @SuppressWarnings("unchecked") @Test public void testPartitionerInvokedWithDeterminatePartitionList() throws Exception { FlinkKafkaPartitioner<String> mockPartitioner = mock(FlinkKafkaPartitioner.class); RuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class); when(mockRuntimeContext.getIndexOfThisSubtask()).thenReturn(0); when(mockRuntimeContext.getNumberOfParallelSubtasks()).thenReturn(1); // out-of-order list of 4 partitions List<PartitionInfo> mockPartitionsList = new ArrayList<>(4); mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 3, null, null, null)); mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 1, null, null, null)); mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 0, null, null, null)); mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 2, null, null, null)); final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), mockPartitioner); producer.setRuntimeContext(mockRuntimeContext); final KafkaProducer mockProducer = producer.getMockKafkaProducer(); when(mockProducer.partitionsFor(anyString())).thenReturn(mockPartitionsList); when(mockProducer.metrics()).thenReturn(null); producer.open(new Configuration()); verify(mockPartitioner, times(1)).open(0, 1); producer.invoke("foobar", SinkContextUtil.forTimestamp(0)); verify(mockPartitioner, times(1)).partition( "foobar", null, "foobar".getBytes(), DummyFlinkKafkaProducer.DUMMY_TOPIC, new int[] {0, 1, 2, 3}); }
Example #15
Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown. */ @Test public void testAsyncErrorRethrownOnInvoke() throws Throwable { final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer)); testHarness.open(); testHarness.processElement(new StreamRecord<>("msg-1")); // let the message request return an async exception producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception")); try { testHarness.processElement(new StreamRecord<>("msg-2")); } catch (Exception e) { // the next invoke should rethrow the async exception Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception")); // test succeeded return; } Assert.fail(); }
Example #16
Source File: FlinkKinesisProducerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown. */ @SuppressWarnings("ResultOfMethodCallIgnored") @Test public void testAsyncErrorRethrownOnInvoke() throws Throwable { final DummyFlinkKinesisProducer<String> producer = new DummyFlinkKinesisProducer<>(new SimpleStringSchema()); OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer)); testHarness.open(); testHarness.processElement(new StreamRecord<>("msg-1")); producer.getPendingRecordFutures().get(0).setException(new Exception("artificial async exception")); try { testHarness.processElement(new StreamRecord<>("msg-2")); } catch (Exception e) { // the next invoke should rethrow the async exception Assert.assertTrue(ExceptionUtils.findThrowableWithMessage(e, "artificial async exception").isPresent()); // test succeeded return; } Assert.fail(); }
Example #17
Source File: FlinkKafkaProducerBaseTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that the constructor eagerly checks bootstrap servers are set in config. */ @Test(expected = IllegalArgumentException.class) public void testInstantiationFailsWhenBootstrapServersMissing() throws Exception { // no bootstrap servers set in props Properties props = new Properties(); // should throw IllegalArgumentException new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); }
Example #18
Source File: ShardConsumerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testCorrectNumOfCollectedRecordsAndUpdatedState() { StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0); LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>(); subscribedShardsStateUnderTest.add( new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard), fakeToBeConsumedShard, new SequenceNumber("fakeStartingState"))); TestSourceContext<String> sourceContext = new TestSourceContext<>(); TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>( Collections.singletonList("fakeStream"), sourceContext, new Properties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), subscribedShardsStateUnderTest, KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")), Mockito.mock(KinesisProxyInterface.class)); int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0)); new ShardConsumer<>( fetcher, shardIndex, subscribedShardsStateUnderTest.get(0).getStreamShardHandle(), subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(), FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, 500L), new ShardMetricsReporter()).run(); assertEquals(1000, sourceContext.getCollectedOutputs().size()); assertEquals( SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(), subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum()); }
Example #19
Source File: ShardConsumerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCorrectNumOfCollectedRecordsAndUpdatedState() { StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0); LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>(); subscribedShardsStateUnderTest.add( new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard), fakeToBeConsumedShard, new SequenceNumber("fakeStartingState"))); TestSourceContext<String> sourceContext = new TestSourceContext<>(); TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>( Collections.singletonList("fakeStream"), sourceContext, new Properties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), subscribedShardsStateUnderTest, KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")), Mockito.mock(KinesisProxyInterface.class)); int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0)); new ShardConsumer<>( fetcher, shardIndex, subscribedShardsStateUnderTest.get(0).getStreamShardHandle(), subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(), FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, 500L), new ShardMetricsReporter()).run(); assertEquals(1000, sourceContext.getCollectedOutputs().size()); assertEquals( SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(), subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum()); }
Example #20
Source File: FlinkKinesisProducerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testConfigureWithNonSerializableCustomPartitionerFails() { exception.expect(IllegalArgumentException.class); exception.expectMessage("The provided custom partitioner is not serializable"); new FlinkKinesisProducer<>(new SimpleStringSchema(), TestUtils.getStandardProperties()) .setCustomPartitioner(new NonSerializableCustomPartitioner()); }
Example #21
Source File: Main.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception{ final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL; Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool); SingleOutputStreamOperator<Student> student = env.addSource(new FlinkKafkaConsumer011<>( parameterTool.get(METRICS_TOPIC), //这个 kafka topic 需要和上面的工具类的 topic 一致 new SimpleStringSchema(), props)).setParallelism(1) .map(string -> GsonUtil.fromJson(string, Student.class)); //博客里面用的是 fastjson,这里用的是gson解析,解析字符串成 student 对象 student.addSink(new SinkToMySQL()); //数据 sink 到 mysql env.execute("Flink data sink"); }
Example #22
Source File: Main.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL; Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool); SingleOutputStreamOperator<Tuple2<String, String>> product = env.addSource(new FlinkKafkaConsumer011<>( parameterTool.get(METRICS_TOPIC), //这个 kafka topic 需要和上面的工具类的 topic 一致 new SimpleStringSchema(), props)) .map(string -> GsonUtil.fromJson(string, ProductEvent.class)) //反序列化 JSON .flatMap(new FlatMapFunction<ProductEvent, Tuple2<String, String>>() { @Override public void flatMap(ProductEvent value, Collector<Tuple2<String, String>> out) throws Exception { //收集商品 id 和 price 两个属性 out.collect(new Tuple2<>(value.getId().toString(), value.getPrice().toString())); } }); // product.print(); //单个 Redis FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost(parameterTool.get("redis.host")).build(); product.addSink(new RedisSink<Tuple2<String, String>>(conf, new RedisSinkMapper())); //Redis 的 ip 信息一般都从配置文件取出来 //Redis 集群 /* FlinkJedisClusterConfig clusterConfig = new FlinkJedisClusterConfig.Builder() .setNodes(new HashSet<InetSocketAddress>( Arrays.asList(new InetSocketAddress("redis1", 6379)))).build();*/ //Redis Sentinels /* FlinkJedisSentinelConfig sentinelConfig = new FlinkJedisSentinelConfig.Builder() .setMasterName("master") .setSentinels(new HashSet<>(Arrays.asList("sentinel1", "sentinel2"))) .setPassword("") .setDatabase(1).build();*/ env.execute("flink redis connector"); }
Example #23
Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown. */ @Test public void testAsyncErrorRethrownOnInvoke() throws Throwable { final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer)); testHarness.open(); testHarness.processElement(new StreamRecord<>("msg-1")); // let the message request return an async exception producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception")); try { testHarness.processElement(new StreamRecord<>("msg-2")); } catch (Exception e) { // the next invoke should rethrow the async exception Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception")); // test succeeded return; } Assert.fail(); }
Example #24
Source File: FlinkKafkaProducerBaseTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test ensuring that if a snapshot call happens right after an async exception is caught, it should be rethrown. */ @Test public void testAsyncErrorRethrownOnCheckpoint() throws Throwable { final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>( FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null); OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer)); testHarness.open(); testHarness.processElement(new StreamRecord<>("msg-1")); // let the message request return an async exception producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception")); try { testHarness.snapshot(123L, 123L); } catch (Exception e) { // the next invoke should rethrow the async exception Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception")); // test succeeded return; } Assert.fail(); }
Example #25
Source File: TuningKeyedStateDeduplication.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception{ final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10)); env.setParallelism(6); RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing); rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads); rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM); rocksDBStateBackend.enableTtlCompactionFilter(); env.setStateBackend(rocksDBStateBackend); CheckpointConfig checkpointConf = env.getCheckpointConfig(); checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8)); checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20)); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication"); FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>( DeduplicationExampleUtil.topic, new SimpleStringSchema(), props) .setStartFromLatest(); env.addSource(kafkaConsumer) .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class)) // 反序列化 JSON // 这里将日志的主键 id 通过 murmur3_128 hash 后,将生成 long 类型数据当做 key .keyBy((KeySelector<UserVisitWebEvent, Long>) log -> Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong()) .addSink(new KeyedStateDeduplication.KeyedStateSink()); env.execute("TuningKeyedStateDeduplication"); }
Example #26
Source File: KeyedStateDeduplication.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception{ StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(6); // 使用 RocksDBStateBackend 做为状态后端,并开启增量 Checkpoint RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend( "hdfs:///flink/checkpoints", true); rocksDBStateBackend.setNumberOfTransferingThreads(3); // 设置为机械硬盘+内存模式,强烈建议为 RocksDB 配备 SSD rocksDBStateBackend.setPredefinedOptions( PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM); rocksDBStateBackend.enableTtlCompactionFilter(); env.setStateBackend(rocksDBStateBackend); // Checkpoint 间隔为 10 分钟 env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10)); // 配置 Checkpoint CheckpointConfig checkpointConf = env.getCheckpointConfig(); checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8)); checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20)); checkpointConf.enableExternalizedCheckpoints( CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); // Kafka Consumer 配置 Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication"); FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>( DeduplicationExampleUtil.topic, new SimpleStringSchema(), props) .setStartFromGroupOffsets(); env.addSource(kafkaConsumer) .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class)) // 反序列化 JSON .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId) .addSink(new KeyedStateSink()); env.execute("KeyedStateDeduplication"); }
Example #27
Source File: KafkaShortRetentionTestBase.java From flink with Apache License 2.0 | 5 votes |
/** * Ensure that the consumer is properly failing if "auto.offset.reset" is set to "none". */ public void runFailOnAutoOffsetResetNone() throws Exception { final String topic = "auto-offset-reset-none-test"; final int parallelism = 1; kafkaServer.createTestTopic(topic, parallelism, 1); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately env.getConfig().disableSysoutLogging(); // ----------- add consumer ---------- Properties customProps = new Properties(); customProps.putAll(standardProps); customProps.putAll(secureProps); customProps.setProperty("auto.offset.reset", "none"); // test that "none" leads to an exception FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps); DataStreamSource<String> consuming = env.addSource(source); consuming.addSink(new DiscardingSink<String>()); try { env.execute("Test auto offset reset none"); } catch (Throwable e) { // check if correct exception has been thrown if (!e.getCause().getCause().getMessage().contains("Unable to find previous offset") // kafka 0.8 && !e.getCause().getCause().getMessage().contains("Undefined offset with no reset policy for partition") // kafka 0.9 ) { throw e; } } kafkaServer.deleteTestTopic(topic); }
Example #28
Source File: KafkaConsumer08Test.java From flink with Apache License 2.0 | 5 votes |
@Test public void testAllBoostrapServerHostsAreInvalid() { try { String unknownHost = "foobar:11111"; URL unknownHostURL = NetUtils.getCorrectHostnamePort(unknownHost); PowerMockito.mockStatic(InetAddress.class); when(InetAddress.getByName(Matchers.eq(unknownHostURL.getHost()))).thenThrow(new UnknownHostException("Test exception")); String zookeeperConnect = "localhost:56794"; String groupId = "non-existent-group"; Properties props = createKafkaProps(zookeeperConnect, unknownHost, groupId); FlinkKafkaConsumer08<String> consumer = new FlinkKafkaConsumer08<>( Collections.singletonList("no op topic"), new SimpleStringSchema(), props); StreamingRuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class); Mockito.when(mockRuntimeContext.isCheckpointingEnabled()).thenReturn(true); consumer.setRuntimeContext(mockRuntimeContext); consumer.open(new Configuration()); fail(); } catch (Exception expected) { assertTrue("Exception should be thrown containing 'all bootstrap servers invalid' message!", expected.getMessage().contains("All the servers provided in: '" + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + "' config are invalid")); } }
Example #29
Source File: KafkaConsumer08Test.java From flink with Apache License 2.0 | 5 votes |
@Test public void testAtLeastOneBootstrapServerHostIsValid() throws Exception { try { String zookeeperConnect = "localhost:56794"; String unknownHost = "foobar:11111"; // we declare one valid bootstrap server, namely the one with 'localhost' String bootstrapServers = unknownHost + ", localhost:22222"; URL unknownHostURL = NetUtils.getCorrectHostnamePort(unknownHost); PowerMockito.mockStatic(InetAddress.class); when(InetAddress.getByName(Matchers.eq(unknownHostURL.getHost()))).thenThrow(new UnknownHostException("Test exception")); String groupId = "non-existent-group"; Properties props = createKafkaProps(zookeeperConnect, bootstrapServers, groupId); DummyFlinkKafkaConsumer consumer = new DummyFlinkKafkaConsumer( "no op topic", new SimpleStringSchema(), props); consumer.open(new Configuration()); // no exception should be thrown, because we have one valid bootstrap server; test passes if we reach here } catch (Exception e) { assertFalse("No exception should be thrown containing 'all bootstrap servers invalid' message!", e.getMessage().contains("All the servers provided in: '" + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + "' config are invalid")); } }
Example #30
Source File: EmulatedPubSubSinkTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testFlinkSink() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); List<String> input = Arrays.asList("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eigth", "Nine", "Ten"); // Create test stream DataStream<String> theData = env .fromCollection(input) .name("Test input") .map((MapFunction<String, String>) StringUtils::reverse); // Sink into pubsub theData .addSink(PubSubSink.newBuilder() .withSerializationSchema(new SimpleStringSchema()) .withProjectName(PROJECT_NAME) .withTopicName(TOPIC_NAME) // Specific for emulator .withHostAndPortForEmulator(getPubSubHostPort()) .withCredentials(NoCredentials.getInstance()) .build()) .name("PubSub sink"); // Run env.execute(); // Now get the result from PubSub and verify if everything is there List<ReceivedMessage> receivedMessages = pubsubHelper.pullMessages(PROJECT_NAME, SUBSCRIPTION_NAME, 100); assertEquals("Wrong number of elements", input.size(), receivedMessages.size()); // Check output strings List<String> output = new ArrayList<>(); receivedMessages.forEach(msg -> output.add(msg.getMessage().getData().toStringUtf8())); for (String test : input) { assertTrue("Missing " + test, output.contains(StringUtils.reverse(test))); } }