org.apache.flink.streaming.connectors.kinesis.FlinkKinesisConsumer Java Examples
The following examples show how to use
org.apache.flink.streaming.connectors.kinesis.FlinkKinesisConsumer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConsumeFromKinesis.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool pt = ParameterTool.fromArgs(args); StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.setParallelism(1); Properties kinesisConsumerConfig = new Properties(); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region")); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey")); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey")); DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>( "flink-test", new SimpleStringSchema(), kinesisConsumerConfig)); kinesis.print(); see.execute(); }
Example #2
Source File: ConsumeFromKinesis.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool pt = ParameterTool.fromArgs(args); StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.setParallelism(1); Properties kinesisConsumerConfig = new Properties(); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region")); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey")); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey")); DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>( "flink-test", new SimpleStringSchema(), kinesisConsumerConfig)); kinesis.print(); see.execute(); }
Example #3
Source File: ConsumeFromKinesis.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool pt = ParameterTool.fromArgs(args); StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.setParallelism(1); Properties kinesisConsumerConfig = new Properties(); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region")); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey")); kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey")); DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>( "flink-test", new SimpleStringSchema(), kinesisConsumerConfig)); kinesis.print(); see.execute(); }
Example #4
Source File: RoutableProtobufKinesisSourceProviderTest.java From flink-statefun with Apache License 2.0 | 6 votes |
@Test public void exampleUsage() { JsonNode ingressDefinition = loadAsJsonFromClassResource( getClass().getClassLoader(), "routable-protobuf-kinesis-ingress.yaml"); JsonIngressSpec<?> spec = new JsonIngressSpec<>( PolyglotKinesisIOTypes.ROUTABLE_PROTOBUF_KINESIS_INGRESS_TYPE, new IngressIdentifier<>(Message.class, "foo", "bar"), ingressDefinition); RoutableProtobufKinesisSourceProvider provider = new RoutableProtobufKinesisSourceProvider(); SourceFunction<?> source = provider.forSpec(spec); assertThat(source, instanceOf(FlinkKinesisConsumer.class)); }
Example #5
Source File: kda-java-firehose.java From aws-doc-sdk-examples with Apache License 2.0 | 5 votes |
private static DataStream<String> createSourceFromStaticConfig(StreamExecutionEnvironment env) { Properties inputProperties = new Properties(); inputProperties.setProperty(ConsumerConfigConstants.AWS_REGION, region); inputProperties.setProperty(ConsumerConfigConstants.STREAM_INITIAL_POSITION, "LATEST"); return env.addSource(new FlinkKinesisConsumer<>(inputStreamName, new SimpleStringSchema(), inputProperties)); }
Example #6
Source File: KinesisSourceProvider.java From flink-statefun with Apache License 2.0 | 5 votes |
@Override public <T> SourceFunction<T> forSpec(IngressSpec<T> spec) { final KinesisIngressSpec<T> kinesisIngressSpec = asKinesisSpec(spec); return new FlinkKinesisConsumer<>( kinesisIngressSpec.streams(), deserializationSchemaFromSpec(kinesisIngressSpec), propertiesFromSpec(kinesisIngressSpec)); }
Example #7
Source File: KinesisSourceProviderTest.java From flink-statefun with Apache License 2.0 | 5 votes |
@Test public void exampleUsage() { final KinesisIngressSpec<String> kinesisIngressSpec = KinesisIngressBuilder.forIdentifier(ID) .withAwsRegion("us-west-1") .withAwsCredentials(AwsCredentials.basic("access-key-id", "secret-access-key")) .withDeserializer(TestDeserializer.class) .withStream(STREAM_NAME) .build(); final KinesisSourceProvider provider = new KinesisSourceProvider(); final SourceFunction<String> source = provider.forSpec(kinesisIngressSpec); assertThat(source, instanceOf(FlinkKinesisConsumer.class)); }
Example #8
Source File: kda-java-streams.java From aws-doc-sdk-examples with Apache License 2.0 | 5 votes |
private static DataStream<String> createSourceFromStaticConfig(StreamExecutionEnvironment env) { Properties inputProperties = new Properties(); inputProperties.setProperty(ConsumerConfigConstants.AWS_REGION, region); inputProperties.setProperty(ConsumerConfigConstants.STREAM_INITIAL_POSITION, "LATEST"); return env.addSource(new FlinkKinesisConsumer<>(inputStreamName, new SimpleStringSchema(), inputProperties)); }
Example #9
Source File: KinesisExample.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); String inputStream = parameterTool.getRequired("input-stream"); String outputStream = parameterTool.getRequired("output-stream"); FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>( inputStream, new KafkaEventSchema(), parameterTool.getProperties()); consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor()); Properties producerProperties = new Properties(parameterTool.getProperties()); // producer needs region even when URL is specified producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1"); // test driver does not deaggregate producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false)); // KPL does not recognize endpoint URL.. String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT); if (kinesisUrl != null) { URL url = new URL(kinesisUrl); producerProperties.put("KinesisEndpoint", url.getHost()); producerProperties.put("KinesisPort", Integer.toString(url.getPort())); producerProperties.put("VerifyCertificate", "false"); } FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>( new KafkaEventSchema(), producerProperties); producer.setDefaultStream(outputStream); producer.setDefaultPartition("fakePartition"); DataStream<KafkaEvent> input = env .addSource(consumer) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink(producer); env.execute(); }
Example #10
Source File: ExactlyOnceValidatingConsumerThread.java From flink with Apache License 2.0 | 4 votes |
public static Thread create(final int totalEventCount, final int failAtRecordCount, final int parallelism, final int checkpointInterval, final long restartDelay, final String awsAccessKey, final String awsSecretKey, final String awsRegion, final String kinesisStreamName, final AtomicReference<Throwable> errorHandler, final int flinkPort, final Configuration flinkConfig) { Runnable exactlyOnceValidationConsumer = new Runnable() { @Override public void run() { try { StreamExecutionEnvironment see = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort, flinkConfig); see.setParallelism(parallelism); see.enableCheckpointing(checkpointInterval); // we restart two times see.setRestartStrategy(RestartStrategies.fixedDelayRestart(2, restartDelay)); // consuming topology Properties consumerProps = new Properties(); consumerProps.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, awsAccessKey); consumerProps.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, awsSecretKey); consumerProps.setProperty(ConsumerConfigConstants.AWS_REGION, awsRegion); // start reading from beginning consumerProps.setProperty(ConsumerConfigConstants.STREAM_INITIAL_POSITION, ConsumerConfigConstants.InitialPosition.TRIM_HORIZON.name()); DataStream<String> consuming = see.addSource(new FlinkKinesisConsumer<>(kinesisStreamName, new SimpleStringSchema(), consumerProps)); consuming .flatMap(new ArtificialFailOnceFlatMapper(failAtRecordCount)) // validate consumed records for correctness (use only 1 instance to validate all consumed records) .flatMap(new ExactlyOnceValidatingMapper(totalEventCount)).setParallelism(1); LOG.info("Starting consuming topology"); tryExecute(see, "Consuming topo"); LOG.info("Consuming topo finished"); } catch (Exception e) { LOG.warn("Error while running consuming topology", e); errorHandler.set(e); } } }; return new Thread(exactlyOnceValidationConsumer); }
Example #11
Source File: kda-java-firehose.java From aws-doc-sdk-examples with Apache License 2.0 | 4 votes |
private static DataStream<String> createSourceFromApplicationProperties(StreamExecutionEnvironment env) throws IOException { Map<String, Properties> applicationProperties = KinesisAnalyticsRuntime.getApplicationProperties(); return env.addSource(new FlinkKinesisConsumer<>(inputStreamName, new SimpleStringSchema(), applicationProperties.get("ConsumerConfigProperties"))); }
Example #12
Source File: kda-java-streams.java From aws-doc-sdk-examples with Apache License 2.0 | 4 votes |
private static DataStream<String> createSourceFromApplicationProperties(StreamExecutionEnvironment env) throws IOException { Map<String, Properties> applicationProperties = KinesisAnalyticsRuntime.getApplicationProperties(); return env.addSource(new FlinkKinesisConsumer<>(inputStreamName, new SimpleStringSchema(), applicationProperties.get("ConsumerConfigProperties"))); }
Example #13
Source File: KinesisExample.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); String inputStream = parameterTool.getRequired("input-stream"); String outputStream = parameterTool.getRequired("output-stream"); FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>( inputStream, new KafkaEventSchema(), parameterTool.getProperties()); consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor()); Properties producerProperties = new Properties(parameterTool.getProperties()); // producer needs region even when URL is specified producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1"); // test driver does not deaggregate producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false)); // KPL does not recognize endpoint URL.. String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT); if (kinesisUrl != null) { URL url = new URL(kinesisUrl); producerProperties.put("KinesisEndpoint", url.getHost()); producerProperties.put("KinesisPort", Integer.toString(url.getPort())); producerProperties.put("VerifyCertificate", "false"); } FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>( new KafkaEventSchema(), producerProperties); producer.setDefaultStream(outputStream); producer.setDefaultPartition("fakePartition"); DataStream<KafkaEvent> input = env .addSource(consumer) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink(producer); env.execute(); }
Example #14
Source File: ExactlyOnceValidatingConsumerThread.java From flink with Apache License 2.0 | 4 votes |
public static Thread create(final int totalEventCount, final int failAtRecordCount, final int parallelism, final int checkpointInterval, final long restartDelay, final String awsAccessKey, final String awsSecretKey, final String awsRegion, final String kinesisStreamName, final AtomicReference<Throwable> errorHandler, final int flinkPort, final Configuration flinkConfig) { Runnable exactlyOnceValidationConsumer = new Runnable() { @Override public void run() { try { StreamExecutionEnvironment see = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort, flinkConfig); see.setParallelism(parallelism); see.enableCheckpointing(checkpointInterval); // we restart two times see.setRestartStrategy(RestartStrategies.fixedDelayRestart(2, restartDelay)); // consuming topology Properties consumerProps = new Properties(); consumerProps.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, awsAccessKey); consumerProps.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, awsSecretKey); consumerProps.setProperty(ConsumerConfigConstants.AWS_REGION, awsRegion); // start reading from beginning consumerProps.setProperty(ConsumerConfigConstants.STREAM_INITIAL_POSITION, ConsumerConfigConstants.InitialPosition.TRIM_HORIZON.name()); DataStream<String> consuming = see.addSource(new FlinkKinesisConsumer<>(kinesisStreamName, new SimpleStringSchema(), consumerProps)); consuming .flatMap(new ArtificialFailOnceFlatMapper(failAtRecordCount)) // validate consumed records for correctness (use only 1 instance to validate all consumed records) .flatMap(new ExactlyOnceValidatingMapper(totalEventCount)).setParallelism(1); LOG.info("Starting consuming topology"); tryExecute(see, "Consuming topo"); LOG.info("Consuming topo finished"); } catch (Exception e) { LOG.warn("Error while running consuming topology", e); errorHandler.set(e); } } }; return new Thread(exactlyOnceValidationConsumer); }
Example #15
Source File: ProcessTaxiStream.java From amazon-kinesis-analytics-taxi-consumer with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); ParameterTool parameter; if (env instanceof LocalStreamEnvironment) { //read the parameters specified from the command line parameter = ParameterTool.fromArgs(args); } else { //read the parameters from the Kinesis Analytics environment Map<String, Properties> applicationProperties = KinesisAnalyticsRuntime.getApplicationProperties(); Properties flinkProperties = applicationProperties.get("FlinkApplicationProperties"); if (flinkProperties == null) { throw new RuntimeException("Unable to load FlinkApplicationProperties properties from the Kinesis Analytics Runtime."); } parameter = ParameterToolUtils.fromApplicationProperties(flinkProperties); } //enable event time processing if (parameter.get("EventTime", "true").equals("true")) { env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); } //set Kinesis consumer properties Properties kinesisConsumerConfig = new Properties(); //set the region the Kinesis stream is located in kinesisConsumerConfig.setProperty(AWSConfigConstants.AWS_REGION, parameter.get("Region", DEFAULT_REGION_NAME)); //obtain credentials through the DefaultCredentialsProviderChain, which includes the instance metadata kinesisConsumerConfig.setProperty(AWSConfigConstants.AWS_CREDENTIALS_PROVIDER, "AUTO"); //poll new events from the Kinesis stream once every second kinesisConsumerConfig.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, "1000"); //create Kinesis source DataStream<Event> kinesisStream = env.addSource(new FlinkKinesisConsumer<>( //read events from the Kinesis stream passed in as a parameter parameter.get("InputStreamName", DEFAULT_STREAM_NAME), //deserialize events with EventSchema new EventDeserializationSchema(), //using the previously defined properties kinesisConsumerConfig )); DataStream<TripEvent> trips = kinesisStream //extract watermarks from watermark events .assignTimestampsAndWatermarks(new TimestampAssigner()) //remove all events that aren't TripEvents .filter(event -> TripEvent.class.isAssignableFrom(event.getClass())) //cast Event to TripEvent .map(event -> (TripEvent) event) //remove all events with geo coordinates outside of NYC .filter(GeoUtils::hasValidCoordinates); DataStream<PickupCount> pickupCounts = trips //compute geo hash for every event .map(new TripToGeoHash()) .keyBy("geoHash") //collect all events in a one hour window .timeWindow(Time.hours(1)) //count events per geo hash in the one hour window .apply(new CountByGeoHash()); DataStream<AverageTripDuration> tripDurations = trips .flatMap(new TripToTripDuration()) .keyBy("pickupGeoHash", "airportCode") .timeWindow(Time.hours(1)) .apply(new TripDurationToAverageTripDuration()); if (parameter.has("ElasticsearchEndpoint")) { String elasticsearchEndpoint = parameter.get("ElasticsearchEndpoint"); final String region = parameter.get("Region", DEFAULT_REGION_NAME); //remove trailling / if (elasticsearchEndpoint.endsWith(("/"))) { elasticsearchEndpoint = elasticsearchEndpoint.substring(0, elasticsearchEndpoint.length()-1); } pickupCounts.addSink(AmazonElasticsearchSink.buildElasticsearchSink(elasticsearchEndpoint, region, "pickup_count", "pickup_count")); tripDurations.addSink(AmazonElasticsearchSink.buildElasticsearchSink(elasticsearchEndpoint, region, "trip_duration", "trip_duration")); } LOG.info("Reading events from stream {}", parameter.get("InputStreamName", DEFAULT_STREAM_NAME)); env.execute(); }
Example #16
Source File: ProcessTaxiStreamLocal.java From amazon-kinesis-analytics-taxi-consumer with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //read the parameters specified from the command line ParameterTool parameter = ParameterTool.fromArgs(args); Properties kinesisConsumerConfig = new Properties(); //set the region the Kinesis stream is located in kinesisConsumerConfig.setProperty(AWSConfigConstants.AWS_REGION, parameter.get("Region", DEFAULT_REGION_NAME)); //obtain credentials through the DefaultCredentialsProviderChain, which includes credentials from the instance metadata kinesisConsumerConfig.setProperty(AWSConfigConstants.AWS_CREDENTIALS_PROVIDER, "AUTO"); //poll new events from the Kinesis stream once every second kinesisConsumerConfig.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, "1000"); //create Kinesis source DataStream<Event> kinesisStream = env.addSource(new FlinkKinesisConsumer<>( //read events from the Kinesis stream passed in as a parameter parameter.get("InputStreamName", DEFAULT_STREAM_NAME), //deserialize events with EventSchema new EventDeserializationSchema(), //using the previously defined Kinesis consumer properties kinesisConsumerConfig )); DataStream<TripEvent> trips = kinesisStream //remove all events that aren't TripEvents .filter(event -> TripEvent.class.isAssignableFrom(event.getClass())) //cast Event to TripEvent .map(event -> (TripEvent) event) //remove all events with geo coordinates outside of NYC .filter(GeoUtils::hasValidCoordinates); //print trip events to stdout trips.print(); LOG.info("Reading events from stream {}", parameter.get("InputStreamName", DEFAULT_STREAM_NAME)); env.execute(); }
Example #17
Source File: KinesisExample.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); String inputStream = parameterTool.getRequired("input-stream"); String outputStream = parameterTool.getRequired("output-stream"); FlinkKinesisConsumer<KafkaEvent> consumer = new FlinkKinesisConsumer<>( inputStream, new KafkaEventSchema(), parameterTool.getProperties()); consumer.setPeriodicWatermarkAssigner(new CustomWatermarkExtractor()); Properties producerProperties = new Properties(parameterTool.getProperties()); // producer needs region even when URL is specified producerProperties.putIfAbsent(ConsumerConfigConstants.AWS_REGION, "us-east-1"); // test driver does not deaggregate producerProperties.putIfAbsent("AggregationEnabled", String.valueOf(false)); // KPL does not recognize endpoint URL.. String kinesisUrl = producerProperties.getProperty(ConsumerConfigConstants.AWS_ENDPOINT); if (kinesisUrl != null) { URL url = new URL(kinesisUrl); producerProperties.put("KinesisEndpoint", url.getHost()); producerProperties.put("KinesisPort", Integer.toString(url.getPort())); producerProperties.put("VerifyCertificate", "false"); } FlinkKinesisProducer<KafkaEvent> producer = new FlinkKinesisProducer<>( new KafkaEventSchema(), producerProperties); producer.setDefaultStream(outputStream); producer.setDefaultPartition("fakePartition"); DataStream<KafkaEvent> input = env .addSource(consumer) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink(producer); env.execute(); }
Example #18
Source File: ExactlyOnceValidatingConsumerThread.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static Thread create(final int totalEventCount, final int failAtRecordCount, final int parallelism, final int checkpointInterval, final long restartDelay, final String awsAccessKey, final String awsSecretKey, final String awsRegion, final String kinesisStreamName, final AtomicReference<Throwable> errorHandler, final int flinkPort, final Configuration flinkConfig) { Runnable exactlyOnceValidationConsumer = new Runnable() { @Override public void run() { try { StreamExecutionEnvironment see = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort, flinkConfig); see.setParallelism(parallelism); see.enableCheckpointing(checkpointInterval); // we restart two times see.setRestartStrategy(RestartStrategies.fixedDelayRestart(2, restartDelay)); // consuming topology Properties consumerProps = new Properties(); consumerProps.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, awsAccessKey); consumerProps.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, awsSecretKey); consumerProps.setProperty(ConsumerConfigConstants.AWS_REGION, awsRegion); // start reading from beginning consumerProps.setProperty(ConsumerConfigConstants.STREAM_INITIAL_POSITION, ConsumerConfigConstants.InitialPosition.TRIM_HORIZON.name()); DataStream<String> consuming = see.addSource(new FlinkKinesisConsumer<>(kinesisStreamName, new SimpleStringSchema(), consumerProps)); consuming .flatMap(new ArtificialFailOnceFlatMapper(failAtRecordCount)) // validate consumed records for correctness (use only 1 instance to validate all consumed records) .flatMap(new ExactlyOnceValidatingMapper(totalEventCount)).setParallelism(1); LOG.info("Starting consuming topology"); tryExecute(see, "Consuming topo"); LOG.info("Consuming topo finished"); } catch (Exception e) { LOG.warn("Error while running consuming topology", e); errorHandler.set(e); } } }; return new Thread(exactlyOnceValidationConsumer); }