org.apache.flink.streaming.api.environment.LocalStreamEnvironment Java Examples
The following examples show how to use
org.apache.flink.streaming.api.environment.LocalStreamEnvironment.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CrawlToolIT.java From flink-crawler with Apache License 2.0 | 6 votes |
@Test public void test() throws Exception { CrawlToolOptions options = new CrawlToolOptions(); options.setSeedUrlsFilename("./src/it/resources/farsi-seeds.txt"); options.setCommonCrawlId("2017-22"); options.setCommonCrawlCacheDir("./target/test/CrawlToolTest/cc-cache/"); options.setForceCrawlDelay(0L); options.setMaxContentSize(100000); options.setWARCContentPath("./target/test/CrawlToolIT/output/cc-farsi-content.txt"); options.setMaxCrawlDuration(20); options.setTimeout(10); LocalStreamEnvironment env = new LocalStreamEnvironmentWithAsyncExecution(); CrawlTool.run(env, options); // TODO confirm results }
Example #2
Source File: FlinkUtilsTest.java From flink-crawler with Apache License 2.0 | 6 votes |
@Test public void testMakeKeyForOperatorIndex() throws Exception { final int parallelism = 2; LocalStreamEnvironment env = new LocalStreamEnvironment(); env.setParallelism(parallelism); final int maxParallelism = env.getMaxParallelism(); DataStreamSource<Tuple2<String, Float>> pages = env.fromElements(Tuple2.of("page0", 0.0f), Tuple2.of("page0", 1.0f), Tuple2.of("page1", 10.0f), Tuple2.of("page666", 6660.0f)); DataStreamSource<Tuple2<String, Float>> epsilon = env.fromElements( Tuple2.of(FlinkUtils.makeKeyForOperatorIndex("task:%d", maxParallelism, parallelism, 0), 0.5f), Tuple2.of(FlinkUtils.makeKeyForOperatorIndex("task:%d", maxParallelism, parallelism, 1), 0.25f)); pages.union(epsilon).keyBy(0).process(new MyProcessFunction()).print(); try { env.execute(); } catch (JobExecutionException e) { Assert.fail(e.getCause().getMessage()); } }
Example #3
Source File: BatchExecutorTest.java From flink with Apache License 2.0 | 6 votes |
public BatchExecutorTest() { batchExecutor = new BatchExecutor(LocalStreamEnvironment.getExecutionEnvironment()); final Transformation testTransform = new LegacySourceTransformation<>( "MockTransform", new StreamSource<>(new SourceFunction<String>() { @Override public void run(SourceContext<String> ctx) { } @Override public void cancel() { } }), BasicTypeInfo.STRING_TYPE_INFO, 1); Pipeline pipeline = batchExecutor.createPipeline( Collections.singletonList(testTransform), new TableConfig(), "Test Job"); streamGraph = (StreamGraph) pipeline; }
Example #4
Source File: UnalignedCheckpointITCase.java From flink with Apache License 2.0 | 6 votes |
@Nonnull private LocalStreamEnvironment createEnv(int parallelism, int slotsPerTaskManager, boolean slotSharing) throws IOException { Configuration conf = new Configuration(); conf.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, slotsPerTaskManager); conf.setFloat(TaskManagerOptions.NETWORK_MEMORY_FRACTION, .9f); conf.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, slotSharing ? (parallelism + slotsPerTaskManager - 1) / slotsPerTaskManager : parallelism * 3); conf.setString(CheckpointingOptions.STATE_BACKEND, "filesystem"); conf.setString(CheckpointingOptions.CHECKPOINTS_DIRECTORY, temp.newFolder().toURI().toString()); final LocalStreamEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(parallelism, conf); env.enableCheckpointing(100); // keep in sync with FailingMapper in #createDAG env.setRestartStrategy(RestartStrategies.fixedDelayRestart(5, Time.milliseconds(100))); env.getCheckpointConfig().enableUnalignedCheckpoints(true); return env; }
Example #5
Source File: FlinkTestUtil.java From AthenaX with Apache License 2.0 | 5 votes |
static LocalFlinkMiniCluster execute(LocalStreamEnvironment env, Configuration conf, String jobName) throws Exception { StreamGraph streamGraph = env.getStreamGraph(); streamGraph.setJobName(jobName); JobGraph jobGraph = streamGraph.getJobGraph(); Configuration configuration = new Configuration(conf); configuration.addAll(jobGraph.getJobConfiguration()); configuration.setLong("taskmanager.memory.size", -1L); configuration.setInteger("taskmanager.numberOfTaskSlots", jobGraph.getMaximumParallelism()); LocalFlinkMiniCluster cluster = new LocalFlinkMiniCluster(configuration, true); cluster.start(); cluster.submitJobDetached(jobGraph); return cluster; }
Example #6
Source File: PravegaEventPublisher.java From pravega-samples with Apache License 2.0 | 5 votes |
private void publishUsingFlinkConnector(AppConfiguration appConfiguration) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); Stream streamId = getStreamId(); FlinkPravegaWriter<Event> writer = FlinkPravegaWriter.<Event>builder() .withPravegaConfig(getPravegaConfig()) .forStream(stream) .withSerializationSchema(PravegaSerialization.serializationFor(Event.class)) .withEventRouter(new EventRouter()) .build(); int parallelism = appConfiguration.getPipeline().getParallelism(); if(appConfiguration.getProducer().isControlledEnv()) { if(!(env instanceof LocalStreamEnvironment)) { throw new Exception("Use a local Flink environment or set controlledEnv to false in app.json."); } //setting this to single instance since the controlled run allows user inout to trigger error events env.setParallelism(1); long latency = appConfiguration.getProducer().getLatencyInMilliSec(); int capacity = appConfiguration.getProducer().getCapacity(); ControlledSourceContextProducer controlledSourceContextProducer = new ControlledSourceContextProducer(capacity, latency); env.addSource(controlledSourceContextProducer).name("EventSource").addSink(writer).name("Pravega-" + streamId.getStreamName()); } else { env.setParallelism(parallelism); SourceContextProducer sourceContextProducer = new SourceContextProducer(appConfiguration); env.addSource(sourceContextProducer).name("EventSource").addSink(writer).name("Pravega-" + streamId.getStreamName()); } env.execute(appConfiguration.getName()+"-producer"); }
Example #7
Source File: FlinkExecutionEnvironmentsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void useDefaultParallelismFromContextBatch() { FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); options.setRunner(TestFlinkRunner.class); ExecutionEnvironment bev = FlinkExecutionEnvironments.createBatchExecutionEnvironment( options, Collections.emptyList()); assertThat(bev, instanceOf(LocalEnvironment.class)); assertThat(options.getParallelism(), is(LocalStreamEnvironment.getDefaultLocalParallelism())); assertThat(bev.getParallelism(), is(LocalStreamEnvironment.getDefaultLocalParallelism())); }
Example #8
Source File: FlinkExecutionEnvironmentsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void useDefaultParallelismFromContextStreaming() { FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); options.setRunner(TestFlinkRunner.class); StreamExecutionEnvironment sev = FlinkExecutionEnvironments.createStreamExecutionEnvironment( options, Collections.emptyList()); assertThat(sev, instanceOf(LocalStreamEnvironment.class)); assertThat(options.getParallelism(), is(LocalStreamEnvironment.getDefaultLocalParallelism())); assertThat(sev.getParallelism(), is(LocalStreamEnvironment.getDefaultLocalParallelism())); }
Example #9
Source File: LocalStreamEnvironmentITCase.java From flink with Apache License 2.0 | 5 votes |
/** * Test test verifies that the execution environment can be used to execute a * single job with multiple slots. */ @Test public void testRunIsolatedJob() throws Exception { LocalStreamEnvironment env = new LocalStreamEnvironment(); assertEquals(1, env.getParallelism()); addSmallBoundedJob(env, 3); env.execute(); }
Example #10
Source File: LocalStreamEnvironmentITCase.java From flink with Apache License 2.0 | 5 votes |
/** * Test test verifies that the execution environment can be used to execute multiple * bounded streaming jobs after one another. */ @Test public void testMultipleJobsAfterAnother() throws Exception { LocalStreamEnvironment env = new LocalStreamEnvironment(); addSmallBoundedJob(env, 3); env.execute(); addSmallBoundedJob(env, 5); env.execute(); }
Example #11
Source File: Bootstrap.java From pinpoint with Apache License 2.0 | 5 votes |
public StreamExecutionEnvironment createStreamExecutionEnvironment() { if (flinkConfiguration.isLocalforFlinkStreamExecutionEnvironment()) { LocalStreamEnvironment localEnvironment = StreamExecutionEnvironment.createLocalEnvironment(); localEnvironment.setParallelism(1); return localEnvironment; } else { return StreamExecutionEnvironment.getExecutionEnvironment(); } }
Example #12
Source File: ProcessTaxiStream.java From amazon-kinesis-analytics-taxi-consumer with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); ParameterTool parameter; if (env instanceof LocalStreamEnvironment) { //read the parameters specified from the command line parameter = ParameterTool.fromArgs(args); } else { //read the parameters from the Kinesis Analytics environment Map<String, Properties> applicationProperties = KinesisAnalyticsRuntime.getApplicationProperties(); Properties flinkProperties = applicationProperties.get("FlinkApplicationProperties"); if (flinkProperties == null) { throw new RuntimeException("Unable to load FlinkApplicationProperties properties from the Kinesis Analytics Runtime."); } parameter = ParameterToolUtils.fromApplicationProperties(flinkProperties); } //enable event time processing if (parameter.get("EventTime", "true").equals("true")) { env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); } //set Kinesis consumer properties Properties kinesisConsumerConfig = new Properties(); //set the region the Kinesis stream is located in kinesisConsumerConfig.setProperty(AWSConfigConstants.AWS_REGION, parameter.get("Region", DEFAULT_REGION_NAME)); //obtain credentials through the DefaultCredentialsProviderChain, which includes the instance metadata kinesisConsumerConfig.setProperty(AWSConfigConstants.AWS_CREDENTIALS_PROVIDER, "AUTO"); //poll new events from the Kinesis stream once every second kinesisConsumerConfig.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, "1000"); //create Kinesis source DataStream<Event> kinesisStream = env.addSource(new FlinkKinesisConsumer<>( //read events from the Kinesis stream passed in as a parameter parameter.get("InputStreamName", DEFAULT_STREAM_NAME), //deserialize events with EventSchema new EventDeserializationSchema(), //using the previously defined properties kinesisConsumerConfig )); DataStream<TripEvent> trips = kinesisStream //extract watermarks from watermark events .assignTimestampsAndWatermarks(new TimestampAssigner()) //remove all events that aren't TripEvents .filter(event -> TripEvent.class.isAssignableFrom(event.getClass())) //cast Event to TripEvent .map(event -> (TripEvent) event) //remove all events with geo coordinates outside of NYC .filter(GeoUtils::hasValidCoordinates); DataStream<PickupCount> pickupCounts = trips //compute geo hash for every event .map(new TripToGeoHash()) .keyBy("geoHash") //collect all events in a one hour window .timeWindow(Time.hours(1)) //count events per geo hash in the one hour window .apply(new CountByGeoHash()); DataStream<AverageTripDuration> tripDurations = trips .flatMap(new TripToTripDuration()) .keyBy("pickupGeoHash", "airportCode") .timeWindow(Time.hours(1)) .apply(new TripDurationToAverageTripDuration()); if (parameter.has("ElasticsearchEndpoint")) { String elasticsearchEndpoint = parameter.get("ElasticsearchEndpoint"); final String region = parameter.get("Region", DEFAULT_REGION_NAME); //remove trailling / if (elasticsearchEndpoint.endsWith(("/"))) { elasticsearchEndpoint = elasticsearchEndpoint.substring(0, elasticsearchEndpoint.length()-1); } pickupCounts.addSink(AmazonElasticsearchSink.buildElasticsearchSink(elasticsearchEndpoint, region, "pickup_count", "pickup_count")); tripDurations.addSink(AmazonElasticsearchSink.buildElasticsearchSink(elasticsearchEndpoint, region, "trip_duration", "trip_duration")); } LOG.info("Reading events from stream {}", parameter.get("InputStreamName", DEFAULT_STREAM_NAME)); env.execute(); }
Example #13
Source File: TestUserAgentAnalysisMapperClass.java From yauaa with Apache License 2.0 | 4 votes |
@Test public void testClassDefinitionDataStream() throws Exception { StreamExecutionEnvironment environment = LocalStreamEnvironment.getExecutionEnvironment(); DataStream<TestRecord> resultDataStream = environment .fromElements( "Mozilla/5.0 (X11; Linux x86_64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/48.0.2564.82 Safari/537.36", "Mozilla/5.0 (Linux; Android 7.0; Nexus 6 Build/NBD90Z) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/53.0.2785.124 Mobile Safari/537.36" ) .map((MapFunction<String, TestRecord>) TestRecord::new) .map(new MyUserAgentAnalysisMapper()); List<TestRecord> result = new ArrayList<>(5); DataStreamUtils .collect(resultDataStream) .forEachRemaining(result::add); assertEquals(2, result.size()); assertThat(result, hasItems( new TestRecord( "Mozilla/5.0 (X11; Linux x86_64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/48.0.2564.82 Safari/537.36", "Desktop", "Chrome 48.0.2564.82", null), new TestRecord( "Mozilla/5.0 (Linux; Android 7.0; Nexus 6 Build/NBD90Z) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/53.0.2785.124 Mobile Safari/537.36", "Phone", "Chrome 53.0.2785.124", null) )); }
Example #14
Source File: PythonEnvironmentFactory.java From Flink-CEPplus with Apache License 2.0 | 2 votes |
/** * Creates a {@link LocalStreamEnvironment}. The local execution environment * will run the program in a multi-threaded fashion in the same JVM as the * environment was created in. The default parallelism of the local * environment is the number of hardware contexts (CPU cores / threads), * unless it was specified differently by {@link PythonStreamExecutionEnvironment#set_parallelism(int)}. * * @param config Pass a custom configuration into the cluster * @return A local execution environment with the specified parallelism. */ public PythonStreamExecutionEnvironment create_local_execution_environment(Configuration config) { return new PythonStreamExecutionEnvironment(new LocalStreamEnvironment(config), new Path(localTmpPath), scriptName); }