org.apache.flink.streaming.api.environment.LocalStreamEnvironment Java Exaples

Source File: CrawlToolIT.java From flink-crawler with Apache License 2.0

6 votes

@Test
public void test() throws Exception {
    CrawlToolOptions options = new CrawlToolOptions();
    options.setSeedUrlsFilename("./src/it/resources/farsi-seeds.txt");
    options.setCommonCrawlId("2017-22");
    options.setCommonCrawlCacheDir("./target/test/CrawlToolTest/cc-cache/");
    options.setForceCrawlDelay(0L);
    options.setMaxContentSize(100000);
    options.setWARCContentPath("./target/test/CrawlToolIT/output/cc-farsi-content.txt");
    options.setMaxCrawlDuration(20);
    options.setTimeout(10);
    
    LocalStreamEnvironment env = new LocalStreamEnvironmentWithAsyncExecution();

    CrawlTool.run(env, options);

    // TODO confirm results
}

Source File: FlinkUtilsTest.java From flink-crawler with Apache License 2.0

6 votes

@Test
public void testMakeKeyForOperatorIndex() throws Exception {
    final int parallelism = 2;
    LocalStreamEnvironment env = new LocalStreamEnvironment();
    env.setParallelism(parallelism);

    final int maxParallelism = env.getMaxParallelism();

    DataStreamSource<Tuple2<String, Float>> pages = env.fromElements(Tuple2.of("page0", 0.0f),
            Tuple2.of("page0", 1.0f), Tuple2.of("page1", 10.0f), Tuple2.of("page666", 6660.0f));
    DataStreamSource<Tuple2<String, Float>> epsilon = env.fromElements(
            Tuple2.of(FlinkUtils.makeKeyForOperatorIndex("task:%d", maxParallelism, parallelism,
                    0), 0.5f),
            Tuple2.of(FlinkUtils.makeKeyForOperatorIndex("task:%d", maxParallelism, parallelism,
                    1), 0.25f));

    pages.union(epsilon).keyBy(0).process(new MyProcessFunction()).print();

    try {
        env.execute();
    } catch (JobExecutionException e) {
        Assert.fail(e.getCause().getMessage());
    }
}

Source File: BatchExecutorTest.java From flink with Apache License 2.0

6 votes

public BatchExecutorTest() {
	batchExecutor = new BatchExecutor(LocalStreamEnvironment.getExecutionEnvironment());

	final Transformation testTransform = new LegacySourceTransformation<>(
		"MockTransform",
		new StreamSource<>(new SourceFunction<String>() {
			@Override
			public void run(SourceContext<String> ctx) {
			}

			@Override
			public void cancel() {
			}
		}),
		BasicTypeInfo.STRING_TYPE_INFO,
		1);
	Pipeline pipeline = batchExecutor.createPipeline(
		Collections.singletonList(testTransform), new TableConfig(), "Test Job");
	streamGraph = (StreamGraph) pipeline;
}

Source File: UnalignedCheckpointITCase.java From flink with Apache License 2.0

6 votes

@Nonnull
private LocalStreamEnvironment createEnv(int parallelism, int slotsPerTaskManager, boolean slotSharing) throws IOException {
	Configuration conf = new Configuration();
	conf.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, slotsPerTaskManager);
	conf.setFloat(TaskManagerOptions.NETWORK_MEMORY_FRACTION, .9f);
	conf.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER,
			slotSharing ? (parallelism + slotsPerTaskManager - 1) / slotsPerTaskManager : parallelism * 3);

	conf.setString(CheckpointingOptions.STATE_BACKEND, "filesystem");
	conf.setString(CheckpointingOptions.CHECKPOINTS_DIRECTORY, temp.newFolder().toURI().toString());

	final LocalStreamEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(parallelism, conf);
	env.enableCheckpointing(100);
	// keep in sync with FailingMapper in #createDAG
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(5, Time.milliseconds(100)));
	env.getCheckpointConfig().enableUnalignedCheckpoints(true);
	return env;
}

Source File: FlinkTestUtil.java From AthenaX with Apache License 2.0

5 votes

static LocalFlinkMiniCluster execute(LocalStreamEnvironment env,
                                     Configuration conf, String jobName) throws Exception {
  StreamGraph streamGraph = env.getStreamGraph();
  streamGraph.setJobName(jobName);
  JobGraph jobGraph = streamGraph.getJobGraph();
  Configuration configuration = new Configuration(conf);
  configuration.addAll(jobGraph.getJobConfiguration());
  configuration.setLong("taskmanager.memory.size", -1L);
  configuration.setInteger("taskmanager.numberOfTaskSlots", jobGraph.getMaximumParallelism());

  LocalFlinkMiniCluster cluster = new LocalFlinkMiniCluster(configuration, true);
  cluster.start();
  cluster.submitJobDetached(jobGraph);
  return cluster;
}

Source File: PravegaEventPublisher.java From pravega-samples with Apache License 2.0

5 votes

private void publishUsingFlinkConnector(AppConfiguration appConfiguration) throws Exception {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		Stream streamId = getStreamId();
		FlinkPravegaWriter<Event> writer = FlinkPravegaWriter.<Event>builder()
				.withPravegaConfig(getPravegaConfig())
				.forStream(stream)
				.withSerializationSchema(PravegaSerialization.serializationFor(Event.class))
				.withEventRouter(new EventRouter())
				.build();

		int parallelism = appConfiguration.getPipeline().getParallelism();

		if(appConfiguration.getProducer().isControlledEnv()) {
			if(!(env instanceof LocalStreamEnvironment)) {
				throw new Exception("Use a local Flink environment or set controlledEnv to false in app.json.");
			}
			//setting this to single instance since the controlled run allows user inout to trigger error events
			env.setParallelism(1);
			long latency = appConfiguration.getProducer().getLatencyInMilliSec();
			int capacity = appConfiguration.getProducer().getCapacity();
			ControlledSourceContextProducer controlledSourceContextProducer = new ControlledSourceContextProducer(capacity, latency);
			env.addSource(controlledSourceContextProducer).name("EventSource").addSink(writer).name("Pravega-" + streamId.getStreamName());
		} else {
			env.setParallelism(parallelism);
			SourceContextProducer sourceContextProducer = new SourceContextProducer(appConfiguration);
			env.addSource(sourceContextProducer).name("EventSource").addSink(writer).name("Pravega-" + streamId.getStreamName());
		}

		env.execute(appConfiguration.getName()+"-producer");

	}

Source File: FlinkExecutionEnvironmentsTest.java From beam with Apache License 2.0

5 votes

@Test
public void useDefaultParallelismFromContextBatch() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setRunner(TestFlinkRunner.class);

  ExecutionEnvironment bev =
      FlinkExecutionEnvironments.createBatchExecutionEnvironment(
          options, Collections.emptyList());

  assertThat(bev, instanceOf(LocalEnvironment.class));
  assertThat(options.getParallelism(), is(LocalStreamEnvironment.getDefaultLocalParallelism()));
  assertThat(bev.getParallelism(), is(LocalStreamEnvironment.getDefaultLocalParallelism()));
}

Source File: FlinkExecutionEnvironmentsTest.java From beam with Apache License 2.0

5 votes

@Test
public void useDefaultParallelismFromContextStreaming() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setRunner(TestFlinkRunner.class);

  StreamExecutionEnvironment sev =
      FlinkExecutionEnvironments.createStreamExecutionEnvironment(
          options, Collections.emptyList());

  assertThat(sev, instanceOf(LocalStreamEnvironment.class));
  assertThat(options.getParallelism(), is(LocalStreamEnvironment.getDefaultLocalParallelism()));
  assertThat(sev.getParallelism(), is(LocalStreamEnvironment.getDefaultLocalParallelism()));
}

Source File: LocalStreamEnvironmentITCase.java From flink with Apache License 2.0

5 votes

/**
 * Test test verifies that the execution environment can be used to execute a
 * single job with multiple slots.
 */
@Test
public void testRunIsolatedJob() throws Exception {
	LocalStreamEnvironment env = new LocalStreamEnvironment();
	assertEquals(1, env.getParallelism());

	addSmallBoundedJob(env, 3);
	env.execute();
}

Source File: LocalStreamEnvironmentITCase.java From flink with Apache License 2.0

5 votes

/**
 * Test test verifies that the execution environment can be used to execute multiple
 * bounded streaming jobs after one another.
 */
@Test
public void testMultipleJobsAfterAnother() throws Exception {
	LocalStreamEnvironment env = new LocalStreamEnvironment();

	addSmallBoundedJob(env, 3);
	env.execute();

	addSmallBoundedJob(env, 5);
	env.execute();
}

Source File: Bootstrap.java From pinpoint with Apache License 2.0

5 votes

public StreamExecutionEnvironment createStreamExecutionEnvironment() {
    if (flinkConfiguration.isLocalforFlinkStreamExecutionEnvironment()) {
        LocalStreamEnvironment localEnvironment = StreamExecutionEnvironment.createLocalEnvironment();
        localEnvironment.setParallelism(1);
        return localEnvironment;
    } else {
        return StreamExecutionEnvironment.getExecutionEnvironment();
    }
}

Source File: ProcessTaxiStream.java From amazon-kinesis-analytics-taxi-consumer with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


  ParameterTool parameter;

  if (env instanceof LocalStreamEnvironment) {
    //read the parameters specified from the command line
    parameter = ParameterTool.fromArgs(args);
  } else {
    //read the parameters from the Kinesis Analytics environment
    Map<String, Properties> applicationProperties = KinesisAnalyticsRuntime.getApplicationProperties();

    Properties flinkProperties = applicationProperties.get("FlinkApplicationProperties");

    if (flinkProperties == null) {
      throw new RuntimeException("Unable to load FlinkApplicationProperties properties from the Kinesis Analytics Runtime.");
    }

    parameter = ParameterToolUtils.fromApplicationProperties(flinkProperties);
  }


  //enable event time processing
  if (parameter.get("EventTime", "true").equals("true")) {
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
  }


  //set Kinesis consumer properties
  Properties kinesisConsumerConfig = new Properties();
  //set the region the Kinesis stream is located in
  kinesisConsumerConfig.setProperty(AWSConfigConstants.AWS_REGION, parameter.get("Region", DEFAULT_REGION_NAME));
  //obtain credentials through the DefaultCredentialsProviderChain, which includes the instance metadata
  kinesisConsumerConfig.setProperty(AWSConfigConstants.AWS_CREDENTIALS_PROVIDER, "AUTO");
  //poll new events from the Kinesis stream once every second
  kinesisConsumerConfig.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, "1000");


  //create Kinesis source
  DataStream<Event> kinesisStream = env.addSource(new FlinkKinesisConsumer<>(
      //read events from the Kinesis stream passed in as a parameter
      parameter.get("InputStreamName", DEFAULT_STREAM_NAME),
      //deserialize events with EventSchema
      new EventDeserializationSchema(),
      //using the previously defined properties
      kinesisConsumerConfig
  ));


  DataStream<TripEvent> trips = kinesisStream
      //extract watermarks from watermark events
      .assignTimestampsAndWatermarks(new TimestampAssigner())
      //remove all events that aren't TripEvents
      .filter(event -> TripEvent.class.isAssignableFrom(event.getClass()))
      //cast Event to TripEvent
      .map(event -> (TripEvent) event)
      //remove all events with geo coordinates outside of NYC
      .filter(GeoUtils::hasValidCoordinates);


  DataStream<PickupCount> pickupCounts = trips
      //compute geo hash for every event
      .map(new TripToGeoHash())
      .keyBy("geoHash")
      //collect all events in a one hour window
      .timeWindow(Time.hours(1))
      //count events per geo hash in the one hour window
      .apply(new CountByGeoHash());


  DataStream<AverageTripDuration> tripDurations = trips
      .flatMap(new TripToTripDuration())
      .keyBy("pickupGeoHash", "airportCode")
      .timeWindow(Time.hours(1))
      .apply(new TripDurationToAverageTripDuration());


  if (parameter.has("ElasticsearchEndpoint")) {
    String elasticsearchEndpoint = parameter.get("ElasticsearchEndpoint");
    final String region = parameter.get("Region", DEFAULT_REGION_NAME);

    //remove trailling /
    if (elasticsearchEndpoint.endsWith(("/"))) {
      elasticsearchEndpoint = elasticsearchEndpoint.substring(0, elasticsearchEndpoint.length()-1);
    }

    pickupCounts.addSink(AmazonElasticsearchSink.buildElasticsearchSink(elasticsearchEndpoint, region, "pickup_count", "pickup_count"));
    tripDurations.addSink(AmazonElasticsearchSink.buildElasticsearchSink(elasticsearchEndpoint, region, "trip_duration", "trip_duration"));
  }


  LOG.info("Reading events from stream {}", parameter.get("InputStreamName", DEFAULT_STREAM_NAME));

  env.execute();
}

Source File: TestUserAgentAnalysisMapperClass.java From yauaa with Apache License 2.0

4 votes

@Test
public void testClassDefinitionDataStream() throws Exception {
    StreamExecutionEnvironment environment = LocalStreamEnvironment.getExecutionEnvironment();

    DataStream<TestRecord> resultDataStream = environment
        .fromElements(
            "Mozilla/5.0 (X11; Linux x86_64) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/48.0.2564.82 Safari/537.36",

            "Mozilla/5.0 (Linux; Android 7.0; Nexus 6 Build/NBD90Z) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/53.0.2785.124 Mobile Safari/537.36"
        )

        .map((MapFunction<String, TestRecord>) TestRecord::new)

        .map(new MyUserAgentAnalysisMapper());

    List<TestRecord> result = new ArrayList<>(5);
    DataStreamUtils
        .collect(resultDataStream)
        .forEachRemaining(result::add);

    assertEquals(2, result.size());

    assertThat(result, hasItems(
        new TestRecord(
            "Mozilla/5.0 (X11; Linux x86_64) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/48.0.2564.82 Safari/537.36",
            "Desktop",
            "Chrome 48.0.2564.82",
            null),

        new TestRecord(
            "Mozilla/5.0 (Linux; Android 7.0; Nexus 6 Build/NBD90Z) " +
                "AppleWebKit/537.36 (KHTML, like Gecko) " +
                "Chrome/53.0.2785.124 Mobile Safari/537.36",
            "Phone",
            "Chrome 53.0.2785.124",
            null)
    ));
}

Source File: PythonEnvironmentFactory.java From Flink-CEPplus with Apache License 2.0

2 votes

/**
 * Creates a {@link LocalStreamEnvironment}. The local execution environment
 * will run the program in a multi-threaded fashion in the same JVM as the
 * environment was created in. The default parallelism of the local
 * environment is the number of hardware contexts (CPU cores / threads),
 * unless it was specified differently by {@link PythonStreamExecutionEnvironment#set_parallelism(int)}.
 *
 * @param config Pass a custom configuration into the cluster
 * @return A local execution environment with the specified parallelism.
 */
public PythonStreamExecutionEnvironment create_local_execution_environment(Configuration config) {
	return new PythonStreamExecutionEnvironment(new LocalStreamEnvironment(config), new Path(localTmpPath), scriptName);
}

org.apache.flink.streaming.api.environment.LocalStreamEnvironment Java Examples