org.apache.flink.streaming.api.scala.StreamExecutionEnvironment Scala Examples
The following examples show how to use org.apache.flink.streaming.api.scala.StreamExecutionEnvironment.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ConsoleReporterTestJob.scala From flink-stuff with Apache License 2.0 | 6 votes |
package com.jgrier.flinkstuff.jobs import com.jgrier.flinkstuff.sources.IntegerSource import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.TimeCharacteristic import org.apache.flink.streaming.api.environment.LocalStreamEnvironment import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.api.scala._ object ConsoleReporterTestJob { def main(args: Array[String]) { val config = new Configuration() config.setString("metrics.reporters", "consoleReporter") config.setString("metrics.reporter.consoleReporter.class", "com.jgrier.flinkstuff.metrics.ConsoleReporter") config.setString("metrics.reporter.consoleReporter.interval", "10 SECONDS") val env = new StreamExecutionEnvironment(new LocalStreamEnvironment(config)) env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) val stream = env.addSource(new IntegerSource(100)) stream .timeWindowAll(Time.seconds(1)) .sum(0) .print env.execute("ConsoleReporterTestJob") } }
Example 2
Source File: RegressITCase.scala From flink-tensorflow with Apache License 2.0 | 6 votes |
package org.apache.flink.contrib.tensorflow.ml import com.twitter.bijection.Conversion._ import org.apache.flink.api.common.functions.RichFlatMapFunction import org.apache.flink.api.scala._ import org.apache.flink.configuration.Configuration import org.apache.flink.contrib.tensorflow.ml.signatures.RegressionMethod._ import org.apache.flink.contrib.tensorflow.types.TensorInjections.{message2Tensor, messages2Tensor} import org.apache.flink.contrib.tensorflow.util.TestData._ import org.apache.flink.contrib.tensorflow.util.{FlinkTestBase, RegistrationUtils} import org.apache.flink.core.fs.Path import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.util.Collector import org.apache.flink.util.Preconditions.checkState import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner import org.scalatest.{Matchers, WordSpecLike} import org.tensorflow.Tensor import org.tensorflow.contrib.scala.Arrays._ import org.tensorflow.contrib.scala.Rank._ import org.tensorflow.contrib.scala._ import org.tensorflow.example.Example import resource._ @RunWith(classOf[JUnitRunner]) class RegressITCase extends WordSpecLike with Matchers with FlinkTestBase { override val parallelism = 1 type LabeledExample = (Example, Float) def examples(): Seq[LabeledExample] = { for (v <- Seq(0.0f -> 2.0f, 1.0f -> 2.5f, 2.0f -> 3.0f, 3.0f -> 3.5f)) yield (example("x" -> feature(v._1)), v._2) } "A RegressFunction" should { "process elements" in { val env = StreamExecutionEnvironment.getExecutionEnvironment RegistrationUtils.registerTypes(env.getConfig) val model = new HalfPlusTwo(new Path("../models/half_plus_two")) val outputs = env .fromCollection(examples()) .flatMap(new RichFlatMapFunction[LabeledExample, Float] { override def open(parameters: Configuration): Unit = model.open() override def close(): Unit = model.close() override def flatMap(value: (Example, Float), out: Collector[Float]): Unit = { for { x <- managed(Seq(value._1).toList.as[Tensor].taggedAs[ExampleTensor]) y <- model.regress_x_to_y(x) } { // cast as a 1D tensor to use the available conversion val o = y.taggedAs[TypedTensor[`1D`,Float]].as[Array[Float]] val actual = o(0) checkState(actual == value._2) out.collect(actual) } } }) .print() env.execute() } } }
Example 3
Source File: ContinueRising.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.cep import java.util import org.apache.flink.api.scala._ import org.apache.flink.cep.functions.PatternProcessFunction import org.apache.flink.cep.pattern.conditions.IterativeCondition import org.apache.flink.cep.scala.CEP import org.apache.flink.cep.scala.pattern.Pattern import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.util.Collector import org.slf4j.LoggerFactory val pattern = Pattern.begin[CepDemoEvent]("first") .next("second").where(new IterativeCondition[CepDemoEvent] { override def filter(currentEvent: CepDemoEvent, context: IterativeCondition.Context[CepDemoEvent]): Boolean = { // get last event val firstList = context.getEventsForPattern("first").iterator() var lastStart: CepDemoEvent = null // get last from firstList, and get the last one while (firstList.hasNext) { lastStart = firstList.next() } if (currentEvent.volume > lastStart.volume) { true } else { false } } }) // always remember add within, it will reduce the state usage .within(Time.minutes(5 * 60 * 1000)) val patternStream = CEP.pattern(input, pattern) val result: DataStream[String] = patternStream.process( new PatternProcessFunction[CepDemoEvent, String]() { override def processMatch( events: util.Map[String, util.List[CepDemoEvent]], ctx: PatternProcessFunction.Context, out: Collector[String]): Unit = { // get the change val first = events.get("first").get(0) val second = events.get("second").get(0) val change = second.volume - first.volume out.collect("from : " + first.id + ", to " + second.id + ", change : " + change) } }) // for convenient, just print result.print() env.execute(this.getClass.getName) } }
Example 4
Source File: SourceFunctionExample.scala From examples-scala with Apache License 2.0 | 5 votes |
package io.github.streamingwithflink.chapter8 import org.apache.flink.api.common.state.{ListState, ListStateDescriptor} import org.apache.flink.api.scala._ import org.apache.flink.runtime.state.{FunctionInitializationContext, FunctionSnapshotContext} import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction import org.apache.flink.streaming.api.functions.source.SourceFunction import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} object SourceFunctionExample { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment val numbers: DataStream[Long] = env.addSource(new CountSource) numbers.print() env.execute() } } class CountSource extends SourceFunction[Long] { var isRunning: Boolean = true override def run(ctx: SourceFunction.SourceContext[Long]): Unit = { var cnt: Long = -1 while (isRunning && cnt < Long.MaxValue) { // increment cnt cnt += 1 ctx.collect(cnt) } } override def cancel(): Unit = isRunning = false } class ReplayableCountSource extends SourceFunction[Long] with CheckpointedFunction { var isRunning: Boolean = true var cnt: Long = _ var offsetState: ListState[Long] = _ override def run(ctx: SourceFunction.SourceContext[Long]): Unit = { while (isRunning && cnt < Long.MaxValue) { ctx.getCheckpointLock.synchronized { // increment cnt cnt += 1 ctx.collect(cnt) } } } override def cancel(): Unit = isRunning = false override def snapshotState(snapshotCtx: FunctionSnapshotContext): Unit = { // remove previous cnt offsetState.clear() // add current cnt offsetState.add(cnt) } override def initializeState(initCtx: FunctionInitializationContext): Unit = { // obtain operator list state to store the current cnt val desc = new ListStateDescriptor[Long]("offset", classOf[Long]) offsetState = initCtx.getOperatorStateStore.getListState(desc) // initialize cnt variable from the checkpoint val it = offsetState.get() cnt = if (null == it || !it.iterator().hasNext) { -1L } else { it.iterator().next() } } }
Example 5
Source File: AfterMatchStrategyDemo.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.cep import java.util import com.venn.common.Common import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.cep.functions.PatternProcessFunction import org.apache.flink.cep.nfa.aftermatch.AfterMatchSkipStrategy import org.apache.flink.cep.pattern.conditions.IterativeCondition import org.apache.flink.cep.scala.CEP import org.apache.flink.cep.scala.pattern.Pattern import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import org.apache.flink.util.Collector import org.slf4j.LoggerFactory val noSkit = AfterMatchSkipStrategy.noSkip() val pattern = Pattern.begin[CepDemoEvent]("first").where(event => { event.name.equals("a") }) // .timesOrMore(1) .next("second").where(event => { event.name.equals("a") }) .next("third").where(event => { event.name.equals("b") }) // .notNext() // always remember add within, it will reduce the state usage // .within(Time.minutes(5 * 60 * 1000)) val patternStream = CEP.pattern(input, pattern) val result: DataStream[String] = patternStream.process( new PatternProcessFunction[CepDemoEvent, String]() { override def processMatch( events: util.Map[String, util.List[CepDemoEvent]], ctx: PatternProcessFunction.Context, out: Collector[String]): Unit = { // get the change val first = events.get("first").get(0) val second = events.get("second").get(0) val third = events.get("third").get(0) out.collect("first : " + first + ", first " + second + ", third : " + third) } }) // for convenient, just print result.print() env.execute(this.getClass.getName) } }
Example 6
Source File: CacheFile.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.tableJoin import java.io.File import java.text.SimpleDateFormat import com.venn.common.Common import org.apache.flink.api.scala._ import org.apache.flink.api.common.functions.RichMapFunction import org.apache.flink.configuration.Configuration import org.apache.flink.formats.json.JsonNodeDeserializationSchema import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import scala.io.Source object CacheFile { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) if ("/".equals(File.separator)) { val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true) env.setStateBackend(backend) env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE) env.registerCachedFile("/opt/flink1.7/data/tablejoin.txt", "tablejoin.txt") } else { env.setMaxParallelism(1) env.setParallelism(1) // file and register name env.registerCachedFile("C:\\Users\\venn\\git\\venn\\flinkDemo\\src\\main\\resources\\data\\tablejoin.txt", "tablejoin.txt") } // cache table val sdf = new SimpleDateFormat("yyyyMMddHHmmss") val source = new FlinkKafkaConsumer[ObjectNode]("table_join", new JsonNodeDeserializationSchema, Common.getProp) env.addSource(source) .map(json => { val id = json.get("id").asText() val phone = json.get("phone").asText() Tuple2(id, phone) }) .map(new RichMapFunction[(String, String), String] { var cache = Map("" -> "") override def open(parameters: Configuration): Unit = { // read cache file val file = getRuntimeContext.getDistributedCache.getFile("tablejoin.txt") if (file.canRead) { val context = Source.fromFile(file, "utf-8").getLines().toArray context.foreach(line => { val tmp = line.split(",") cache += (tmp(0) -> tmp(1)) }) } } override def map(value: (String, String)): String = { val name = cache.get(value._1) value._1 + "," + value._2 + "," + cache.get(value._1) } }) .print() env.execute("cacheFile") } }
Example 7
Source File: CustomerTimerDemo.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.timer import java.io.File import java.sql.{Connection, DriverManager, PreparedStatement, SQLException} import java.util import java.util.{Timer, TimerTask} import org.apache.flink.api.scala._ import com.venn.common.Common import com.venn.util.TwoStringSource import org.apache.flink.api.common.functions.RichMapFunction import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.configuration.Configuration import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer import org.slf4j.LoggerFactory def query() = { logger.info("query mysql") try { Class.forName(driverName) conn = DriverManager.getConnection(jdbcUrl, username, password) ps = conn.prepareStatement("select id,name from venn.timer") val rs = ps.executeQuery while (!rs.isClosed && rs.next) { val id = rs.getString(1) val name = rs.getString(2) map.put(id, name) } logger.info("get config from db size : {}", map.size()) } catch { case e@(_: ClassNotFoundException | _: SQLException) => e.printStackTrace() } finally { if (conn != null) { conn.close() } } } }) // .print() val sink = new FlinkKafkaProducer[String]("timer_out" , new SimpleStringSchema() , Common.getProp) stream.addSink(sink) env.execute(this.getClass.getName) } }
Example 8
Source File: StreamingFileSinkDemo.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.filesink import java.io.File import java.text.SimpleDateFormat import com.venn.common.Common import org.apache.flink.api.common.serialization.{BulkWriter, SimpleStringEncoder} import org.apache.flink.api.scala._ import org.apache.flink.core.fs.Path import org.apache.flink.formats.json.JsonNodeDeserializationSchema import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer object StreamingFileSinkDemo { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) if ("/".equals(File.separator)) { val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true) env.setStateBackend(backend) env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE) } else { env.setMaxParallelism(1) env.setParallelism(1) } val sdf = new SimpleDateFormat("yyyyMMddHHmmss") val source = new FlinkKafkaConsumer[ObjectNode]("roll_file_sink", new JsonNodeDeserializationSchema, Common.getProp) // row format val sinkRow = StreamingFileSink .forRowFormat(new Path("D:\\idea_out\\rollfilesink"), new SimpleStringEncoder[ObjectNode]("UTF-8")) .withBucketAssigner(new DayBucketAssigner) .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour .build() // use define BulkWriterFactory and DayBucketAssinger val sinkBuck = StreamingFileSink .forBulkFormat(new Path("D:\\idea_out\\rollfilesink"), new DayBulkWriterFactory) .withBucketAssigner(new DayBucketAssigner()) .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour .build() env.addSource(source) .assignAscendingTimestamps(json => { sdf.parse(json.get("date").asText()).getTime }) .map(json => { // json.get("date") + "-" + json.toString json }) .addSink(sinkBuck) env.execute("StreamingFileSink") } }
Example 9
Source File: RollingFileSinkDemo.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.filesink import java.io.File import java.text.SimpleDateFormat import com.venn.common.Common import org.apache.flink.formats.json.JsonNodeDeserializationSchema import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.connectors.fs.StringWriter import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import org.apache.flink.api.scala._ val sink = new BucketingSink[String]("D:\\idea_out\\rollfilesink") sink.setBucketer(new DayBasePathBucketer) sink.setWriter(new StringWriter[String]) sink.setBatchSize(1024 * 1024 * 400) // this is 400 MB, // sink.setBatchRolloverInterval(24 * 60 * 60 * 1000) // this is 24 hour // sink.setInProgressPrefix("inProcessPre") // sink.setPendingPrefix("pendingpre") // sink.setPartPrefix("partPre") env.addSource(source) .assignAscendingTimestamps(json => { sdf.parse(json.get("date").asText()).getTime }) .map(json => { json.get("date") + "-" + json.toString }) .addSink(sink) env.execute("rollingFileSink") } }
Example 10
Source File: ProcessWindowForTrigger.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.trigger import java.io.File import java.text.SimpleDateFormat import com.venn.common.Common import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.contrib.streaming.state.RocksDBStateBackend import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.windowing.windows.TimeWindow import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import org.apache.flink.util.Collector import org.slf4j.LoggerFactory object ProcessWindowDemoForTrigger { val logger = LoggerFactory.getLogger(this.getClass) def main(args: Array[String]): Unit = { // environment val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment env.setParallelism(1) if ("\\".equals(File.pathSeparator)) { val rock = new RocksDBStateBackend(Common.CHECK_POINT_DATA_DIR) env.setStateBackend(rock) // checkpoint interval env.enableCheckpointing(10000) } val topic = "current_day" val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") val kafkaSource = new FlinkKafkaConsumer[String](topic, new SimpleStringSchema(), Common.getProp) val stream = env.addSource(kafkaSource) .map(s => { s }) .windowAll(TumblingProcessingTimeWindows.of(Time.seconds(60))) .trigger(CountAndTimeTrigger.of(10, Time.seconds(10))) .process(new ProcessAllWindowFunction[String, String, TimeWindow] { override def process(context: Context, elements: Iterable[String], out: Collector[String]): Unit = { var count = 0 elements.iterator.foreach(s => { count += 1 }) logger.info("this trigger have : {} item", count) } }) // execute job env.execute(this.getClass.getName) } }
Example 11
Source File: MysqlOutputDemo.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.jdbcOutput import java.io.File import com.venn.common.Common import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.streaming.api.functions.ProcessFunction import org.apache.flink.streaming.api.scala.{OutputTag, StreamExecutionEnvironment} import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import org.apache.flink.util.Collector object MysqlOutputDemo { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) if ("/".equals(File.separator)) { val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true) env.setStateBackend(backend) env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE) } else { env.setMaxParallelism(1) env.setParallelism(1) } val source = new FlinkKafkaConsumer[String]("mysql_output", new SimpleStringSchema, Common.getProp) source.setStartFromLatest() env.addSource(source) .map(li => { val tmp = li.split(",") new User(tmp(0), tmp(1), tmp(2)toInt, tmp(3)) }) // .addSink(new MysqlSink1) .writeUsingOutputFormat(new MysqlSink1) env.execute("msqlOutput") } }
Example 12
Source File: CurrentDayPvCountWaterMark.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.dayWindow import java.io.File import java.text.SimpleDateFormat import com.venn.common.Common import com.venn.source.TumblingEventTimeWindows import org.apache.flink.api.common.functions.ReduceFunction import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.contrib.streaming.state.RocksDBStateBackend import org.apache.flink.formats.json.JsonNodeDeserializationSchema import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode import org.apache.flink.streaming.api.TimeCharacteristic import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.windowing.triggers.{ContinuousEventTimeTrigger, ContinuousProcessingTimeTrigger, CountTrigger} import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, FlinkKafkaProducer} .assignAscendingTimestamps(event => sdf.parse(event.createTime).getTime) .windowAll(TumblingEventTimeWindows.of(Time.days(1), Time.hours(-8))) .reduce(new ReduceFunction[Eventx] { override def reduce(event1: Eventx, event2: Eventx): Eventx = { // println("reduce event : " + event2.toString) // val minId:String = if (event1.id.compareTo(event2.id) >= 0 ) event2.id else event1.id // val maxId = if (event1.id.compareTo(event2.id) < 0 ) event1.id else event2.id // val minCreateTime = if ( event1.createTime.compareTo(event2.createTime) >= 0 ) event2.createTime else event1.createTime // val maxCreateTime = if ( event1.createTime.compareTo(event2.createTime) < 0 ) event1.createTime else event2.createTime // val count = event1.count + event2.count // new EventResult(minId, maxId, minCreateTime, maxCreateTime, count) new Eventx(event1.id , event2.id , event1.amt + event2.amt) } }) // format output even, connect min max id, add current timestamp // .map(event => Event(event.id + "-" + event.createTime, sdf.format(System.currentTimeMillis()), event.count)) stream.print("result : ") // execute job env.execute("CurrentDayCount") } }
Example 13
Source File: BroadCastDemo.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.broadcast import java.io.File import com.venn.common.Common import com.venn.util.StringUtil import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.common.state.MapStateDescriptor import org.apache.flink.api.common.typeinfo.BasicTypeInfo import org.apache.flink.api.scala._ import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction import org.apache.flink.streaming.api.functions.source.SourceFunction import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import org.apache.flink.util.Collector object BroadCastDemo { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) if ("/".equals(File.separator)) { val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true) env.setStateBackend(backend) env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE) } else { env.setMaxParallelism(1) env.setParallelism(1) } // 配置更新流 val configSource = new FlinkKafkaConsumer[String]("broad_cast_demo", new SimpleStringSchema, Common.getProp) // 配置流的初始化,可以通过读取配置文件实现 var initFilePath = "" if ("/".equals(File.separator)){ initFilePath = "hdfs:///venn/init_file.txt" }else{ initFilePath = "D:\\idea_out\\broad_cast.txt" } val init = env.readTextFile(initFilePath) val descriptor = new MapStateDescriptor[String, String]("dynamicConfig", BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO) val configStream = env.addSource(configSource).union(init).broadcast(descriptor) val input = env.addSource(new RadomFunction) .connect(configStream) .process(new BroadcastProcessFunction[String, String, String] { override def processBroadcastElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#Context, out: Collector[String]): Unit = { println("new config : " + value) val configMap = ctx.getBroadcastState(descriptor) // process update configMap,读取配置数据,写入广播状态中 val line = value.split(",") configMap.put(line(0), line(1)) } override def processElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#ReadOnlyContext, out: Collector[String]): Unit = { // use give key, return value val configMap = ctx.getBroadcastState(descriptor) // 解析三位城市编码,根据广播状态对应的map,转码为城市对应中文 // println(value) val line = value.split(",") val code = line(0) var va = configMap.get(code) // 不能转码的数据默认输出 中国(code=xxx) if ( va == null){ va = "中国(code="+code+")"; }else{ va = va + "(code="+code+")" } out.collect(va + "," + line(1)) } }) input.print() env.execute("BroadCastDemo") } } class RadomFunction extends SourceFunction[String]{ var flag = true override def cancel(): Unit = { flag = false } override def run(ctx: SourceFunction.SourceContext[String]): Unit = { while (flag){ for (i <- 0 to 300) { var nu = i.toString while (nu.length < 3) { nu = "0" + nu } ctx.collect(nu + "," + StringUtil.getRandomString(5)) Thread.sleep(2000) } } } }
Example 14
Source File: FlinkTestUtils.scala From flink-parameter-server with Apache License 2.0 | 5 votes |
package hu.sztaki.ilab.ps.test.utils import org.apache.flink.runtime.client.JobExecutionException import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment object FlinkTestUtils { case class SuccessException[T](content: T) extends Exception { override def toString: String = s"SuccessException($content)" } case class NoSuccessExceptionReceived() extends Exception def executeWithSuccessCheck[T](env: StreamExecutionEnvironment)(checker: T => Unit): Unit = { try { env.execute() throw NoSuccessExceptionReceived() } catch { case e: JobExecutionException => val rootCause = Stream.iterate[Throwable](e)(_.getCause()).takeWhile(_ != null).last rootCause match { case successException: SuccessException[T] => checker(successException.content) case otherCause => throw e } case e: Throwable => throw e } } }
Example 15
Source File: FlinkStreamingCEPTest.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.cep.test.flink import java.io.File import dbis.piglet.backends.{ Record, SchemaClass } import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.scalatest._ import org.apache.commons.io.FileUtils import org.apache.flink.api.scala._ import dbis.piglet.cep.nfa._ import dbis.piglet.cep.ops.SelectionStrategy._ import dbis.piglet.cep.ops.OutputStrategy._ import dbis.piglet.cep.flink.CustomDataStreamMatcher._ import scala.collection.mutable.ArrayBuffer import org.apache.flink.streaming.api.windowing.windows.GlobalWindow import org.apache.flink.streaming.api.windowing.assigners.GlobalWindows case class StreamingDoubleRecord(col1: Int, col2: Int) extends java.io.Serializable with SchemaClass { override def mkString(delim: String) = s"$col1$delim$col2" } object OurStreamingNFA { def filter1(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 1 def filter2(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 2 def filter3(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 3 def createNFA = { val testNFA: NFAController[StreamingDoubleRecord] = new NFAController() val firstState = testNFA.createAndGetStartState("First") val secondState = testNFA.createAndGetNormalState("Second") val thirdState = testNFA.createAndGetNormalState("Third") val finalState = testNFA.createAndGetFinalState("Final") val firstEdge = testNFA.createAndGetForwardEdge(filter1) val secondEdge = testNFA.createAndGetForwardEdge(filter2) val thirdEdge = testNFA.createAndGetForwardEdge(filter3) testNFA.createForwardTransition(firstState, firstEdge, secondState) testNFA.createForwardTransition(secondState, secondEdge, thirdState) testNFA.createForwardTransition(thirdState, thirdEdge, finalState) testNFA } } class FlinkStreamingCEPTest extends FlatSpec with Matchers with BeforeAndAfterEach { var resultArray = new ArrayBuffer[StreamingDoubleRecord] override def beforeEach() { resultArray.clear() } val sample = Seq( StreamingDoubleRecord(1,1), StreamingDoubleRecord(2,2), StreamingDoubleRecord(1,3), StreamingDoubleRecord(2,4), StreamingDoubleRecord(3,5), StreamingDoubleRecord(1,6), StreamingDoubleRecord(2,7), StreamingDoubleRecord(3,8)) "Flink Streaming CEP" should "detect the pattern SEQ(A, B, C) with first match" in { val env = StreamExecutionEnvironment.getExecutionEnvironment env.getConfig.disableSysoutLogging() val data = env.fromCollection(sample) val res = data.matchNFA(OurStreamingNFA.createNFA, env, FirstMatch) } it should "detect the pattern SEQ(A, B, C) with any match" in { val env = StreamExecutionEnvironment.getExecutionEnvironment env.getConfig.disableSysoutLogging() val data = env.fromCollection(sample) val res = data.matchNFA(OurStreamingNFA.createNFA, env, AllMatches) } it should "detect the pattern SEQ(A, B, C) with next match" in { val env = StreamExecutionEnvironment.getExecutionEnvironment env.getConfig.disableSysoutLogging() val data = env.fromCollection(sample) val res = data.matchNFA(OurStreamingNFA.createNFA, env, NextMatches) } it should "detect the pattern SEQ(A, B, C) with contiguity match" in { val env = StreamExecutionEnvironment.getExecutionEnvironment env.getConfig.disableSysoutLogging() val data = env.fromCollection(sample) val res = data.matchNFA(OurStreamingNFA.createNFA, env, ContiguityMatches) } }
Example 16
Source File: SimpleWordCount.scala From flink_training with Apache License 2.0 | 5 votes |
package com.tmalaska.flinktraining.example.wordcount import java.util.Properties import java.util.concurrent.TimeUnit import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.streaming.api.windowing.time.Time object SimpleWordCount { def main(args: Array[String]) { val kafkaServerURL = args(0) val kafkaServerPort = args(1) val kafkaTopic = args(2) val groupId = args(3) val typeOfWindow = args(4) val env = StreamExecutionEnvironment.getExecutionEnvironment // create a stream using socket val properties = new Properties properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) properties.setProperty("zookeeper.connect", "localhost:2181") properties.setProperty("group.id", groupId) println("kafkaTopic:" + kafkaTopic) val wordCountStream:DataStream[String] = env.addSource( new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties)) // implement word count val wordsStream = wordCountStream .flatMap(line => line.toUpperCase.split(' ')) .map(word => (word, 1)) //.flatMap{_.toUpperCase.split(' ')} //.map{ (_,1) } val keyValuePair = wordsStream.keyBy(0) val countPair = if (typeOfWindow.equals("slidingCount")) { //Slide by count. Have a sliding window of 5 messages and trigger or slide 2 messages keyValuePair.countWindow(5, 2).sum(1) } else if (typeOfWindow.equals("tumbleTime")) { //Tumble by time. Trigger and Slide by 5 seconds keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS)).sum(1) } else if (typeOfWindow.equals("slidingTime")) { //Slide by time. Have a sliding window of 5 seconds that tiggers every 2 seconds keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS), Time.of(2, TimeUnit.SECONDS)).sum(1) } else { //Tumble by time. Trigger every 5 seconds keyValuePair.countWindow(5).sum(1) } // print the results countPair.print() // execute the program env.execute("Scala WordCount Example") } }
Example 17
Source File: StreamingSQL.scala From flink_training with Apache License 2.0 | 5 votes |
package com.tmalaska.flinktraining.example.wordcount import java.util.Properties import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.streaming.api.functions.sink.SinkFunction import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 import org.apache.flink.table.api.scala._ import org.apache.flink.table.api.{Table, TableEnvironment} import org.apache.flink.types.Row object StreamingSQL { def main(args:Array[String]): Unit = { val kafkaServerURL = args(0) val kafkaServerPort = args(1) val kafkaTopic = args(2) val groupId = args(3) val env = StreamExecutionEnvironment.getExecutionEnvironment val tableEnv = TableEnvironment.getTableEnvironment(env) val properties = new Properties properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) properties.setProperty("zookeeper.connect", "localhost:2181") properties.setProperty("group.id", groupId) println("kafkaTopic:" + kafkaTopic) val entityCountStream:DataStream[(String, Int)] = env.addSource( new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties)) .flatMap(line => line.toUpperCase.split(' ')) .map(word => (word, 1)) tableEnv.registerDataStream("myTable2", entityCountStream, 'word, 'frequency) val roleUp = tableEnv.sqlQuery("SELECT word, SUM(frequency) FROM myTable2 GROUP BY word") val typeInfo = createTypeInformation[(String, Int)] val outStream = roleUp.toRetractStream(typeInfo) outStream.print() env.execute("Scala SQL Example") } } class CustomSinkFunction() extends SinkFunction[Row] { @throws[Exception] def invoke(value: Row): Unit = { //Do something println("-" + value) } }
Example 18
Source File: StreamingSessionExample.scala From flink_training with Apache License 2.0 | 5 votes |
package com.tmalaska.flinktraining.example.session import java.util.Properties import net.liftweb.json.DefaultFormats import net.liftweb.json.Serialization.read import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor} import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.ProcessFunction import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.util.Collector import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ object StreamingSessionExample { def main(args:Array[String]): Unit = { val kafkaServerURL = args(0) val kafkaServerPort = args(1) val kafkaTopic = args(2) val groupId = args(3) val sessionTimeOut = args(4).toInt val env = StreamExecutionEnvironment.getExecutionEnvironment //val socketStream = env.socketTextStream("localhost",9999, '\n') val properties = new Properties properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) properties.setProperty("zookeeper.connect", "localhost:2181") properties.setProperty("group.id", groupId) println("kafkaTopic:" + kafkaTopic) val messageStream:DataStream[String] = env.addSource( new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties)) val heartBeatStream = messageStream .map(str => { implicit val formats = DefaultFormats println("str:" + str) val hb = read[HeartBeat](str) (hb.entityId, hb.eventTime) }).keyBy(0).process(new MyProcessFunction(sessionTimeOut)) heartBeatStream.map(session => { println("session:" + session) session }) heartBeatStream.print() env.execute() } } class MyProcessFunction(sessionTimeOut:Int) extends ProcessFunction[(String,Long), SessionObj] { private var state:ValueState[SessionObj] = null override def open(parameters: Configuration): Unit = { state = getRuntimeContext.getState(new ValueStateDescriptor[SessionObj]("myState", classOf[SessionObj])) } override def processElement(value: (String, Long), ctx: ProcessFunction[(String, Long), SessionObj]#Context, out: Collector[SessionObj]): Unit = { val currentSession = state.value() var outBoundSessionRecord:SessionObj = null if (currentSession == null) { outBoundSessionRecord = SessionObj(value._2, value._2, 1) } else { outBoundSessionRecord = SessionObj(currentSession.startTime, value._2, currentSession.heartbeatCount + 1) } state.update(outBoundSessionRecord) out.collect(outBoundSessionRecord) ctx.timerService.registerEventTimeTimer(System.currentTimeMillis() + sessionTimeOut) } override def onTimer(timestamp: Long, ctx: ProcessFunction[(String, Long), SessionObj]#OnTimerContext, out: Collector[SessionObj]): Unit = { val result = state.value if (result != null && result.latestEndTime + sessionTimeOut < System.currentTimeMillis()) { // emit the state on timeout state.clear() } } } case class SessionObj(startTime:Long, latestEndTime:Long, heartbeatCount:Int)
Example 19
Source File: EventTimeHeartBeatExample.scala From flink_training with Apache License 2.0 | 5 votes |
package com.tmalaska.flinktraining.example.eventtime import java.util.Properties import java.util.concurrent.TimeUnit import com.tmalaska.flinktraining.example.session.HeartBeat import net.liftweb.json.DefaultFormats import net.liftweb.json.Serialization.read import org.apache.flink.api.scala._ import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.streaming.api.TimeCharacteristic import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.streaming.api.watermark.Watermark import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 object EventTimeHeartBeatExample { def main(args: Array[String]) { val kafkaServerURL = args(0) val kafkaServerPort = args(1) val kafkaTopic = args(2) val groupId = args(3) val typeOfWindow = args(4) val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) // create a stream using socket val properties = new Properties properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) properties.setProperty("zookeeper.connect", "localhost:2181") properties.setProperty("group.id", groupId) println("kafkaTopic:" + kafkaTopic) val heartbeatStream:DataStream[HeartBeat] = env.addSource( new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties)) .map(json => { implicit val formats = DefaultFormats read[HeartBeat](json) }) .assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks[HeartBeat]() { override def getCurrentWatermark: Watermark = { new Watermark(System.currentTimeMillis() - 10000) } override def extractTimestamp(element: HeartBeat, previousElementTimestamp: Long): Long = { element.eventTime } }) // implement word count val entityCount = heartbeatStream .map(heartBeat => (heartBeat.entityId, 1)) val keyValuePair = entityCount.keyBy(0) val countPair = if (typeOfWindow.equals("slidingCount")) { //Slide by count. Have a sliding window of 5 messages and trigger or slide 2 messages keyValuePair.countWindow(5, 2).sum(1) } else if (typeOfWindow.equals("tumbleTime")) { //Tumble by time. Trigger and Slide by 5 seconds keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS)).sum(1) } else if (typeOfWindow.equals("slidingTime")) { //Slide by time. Have a sliding window of 5 seconds that tiggers every 2 seconds keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS), Time.of(2, TimeUnit.SECONDS)).sum(1) } else { //Tumble by time. Trigger every 5 seconds keyValuePair.countWindow(5).sum(1) } // print the results countPair.print() // execute the program env.execute("Scala WordCount Example") } } class MessageTimestamp extends AssignerWithPeriodicWatermarks[HeartBeat] { override def getCurrentWatermark: Watermark = { //TODO null } override def extractTimestamp(t: HeartBeat, l: Long): Long = { //TODO -1 } }
Example 20
Source File: WordCountTimeWindowWithSocket.scala From flink-hairless-notes with Apache License 2.0 | 5 votes |
package wang.yangting.tech.flink.streaming.scala import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.scala._ object WordCountTimeWindowWithSocket { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment val text = env.socketTextStream("localhost", 9999) val counts = text.flatMap { _.toLowerCase.split(" +") filter{ _.nonEmpty }} .map { (_, 1) } .keyBy(0) .timeWindow(Time.seconds(5)) .sum(1) counts.print() env.execute("Window Stream WordCount") } }
Example 21
Source File: TableKafkaJsonSQL.scala From flink-hairless-notes with Apache License 2.0 | 5 votes |
package wang.yangting.tech.flink.streaming.scala.table import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _} import org.apache.flink.table.api.EnvironmentSettings import org.apache.flink.table.api.scala.{StreamTableEnvironment, _} import org.apache.flink.types.Row def output(): Unit = { // 1001,zhangsan,100 // 1002,lisi,2000 // 1003,wangwu,1200 // (true,100) // (false,100) // (true,2100) // (false,2100) // (true,3300) } }
Example 22
Source File: TableKafkaJsonConnector.scala From flink-hairless-notes with Apache License 2.0 | 5 votes |
package wang.yangting.tech.flink.streaming.scala.table import org.apache.flink.api.scala.typeutils.Types import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _} import org.apache.flink.table.api.EnvironmentSettings import org.apache.flink.table.api.scala.{StreamTableEnvironment, _} import org.apache.flink.table.descriptors.{Json, Kafka, Schema} import org.apache.flink.types.Row def output(): Unit = { // 1001,zhangsan,100 // 1002,lisi,2000 // 1003,wangwu,1200 // (true,100) // (false,100) // (true,2100) // (false,2100) // (true,3300) } }
Example 23
Source File: TableFlinkStreamingQuery.scala From flink-hairless-notes with Apache License 2.0 | 5 votes |
package wang.yangting.tech.flink.streaming.scala.table import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _} import org.apache.flink.table.api.EnvironmentSettings import org.apache.flink.table.api.scala.StreamTableEnvironment import org.apache.flink.table.api.scala._ fsTableEnv.registerDataStream("users", words, 'id, 'name) // toAppendStream val selectAll = fsTableEnv.sqlQuery("SELECT name, id FROM users") selectAll.toAppendStream[(String, Int)].print() // toRetractStream val selectGroupByAll = fsTableEnv.sqlQuery("SELECT name, count(1) AS cnt FROM users GROUP BY name") selectGroupByAll.toRetractStream[(String, Long)].print() // execute fsEnv.execute("TableFlinkStreamingQuery") } }
Example 24
Source File: TransformationIterate.scala From flink-hairless-notes with Apache License 2.0 | 5 votes |
package wang.yangting.tech.flink.streaming.scala.operators.transformation import org.apache.flink.streaming.api.scala.{ConnectedStreams, StreamExecutionEnvironment} object TransformationIterate { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment import org.apache.flink.api.scala._ val dataStream = env.fromElements(3,1,2,1,5).map{t:Int => t} val iterated = dataStream.iterate((input : ConnectedStreams[Int,Int]) => { //分别定义两个map方法完成对输入ConnectedStreams数据集收集的处理 val head = input.map(i => (i+1), s => s ) (head.filter(_ % 2 ==0), head.filter(_ % 2 != 0)) },1000) //输出 //5> 3 iterated.print() env.execute() } }
Example 25
Source File: TransformationKeyByAndReduceAndAggregations.scala From flink-hairless-notes with Apache License 2.0 | 5 votes |
package wang.yangting.tech.flink.streaming.scala.operators.transformation import org.apache.flink.api.java.tuple.Tuple import org.apache.flink.streaming.api.scala.{KeyedStream, StreamExecutionEnvironment} object TransformationKeyByAndReduceAndAggregations { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment import org.apache.flink.api.scala._ val dataStream = env.fromElements((1, 1), (1, 2), (2, 3), (2, 4)) //指定第一个字段为分区key val keyedStream: KeyedStream[(Int, Int), Tuple] = dataStream.keyBy(0) //滚动对第二个字段进行reduce相加求和 val reduceStream = keyedStream.reduce { (t1, t2) => (t1._1, t1._2 + t2._2) } reduceStream.print("reduce") //reduce:8> (2,7) reduce:6> (1,3) //按照分区对第二个字段求和 val sumStream = keyedStream.sum(1) sumStream.print("sum") //sum:8> (2,7) sum:6> (1,3) //滚动计算指定key最小值 val minStream = keyedStream.min(1) minStream.print("min") //滚动计算指定key的最小值,返回最小值对应的元素 val minByStream = keyedStream.minBy(1) minByStream.print("minBy") env.execute() } }
Example 26
Source File: TransformationConnect.scala From flink-hairless-notes with Apache License 2.0 | 5 votes |
package wang.yangting.tech.flink.streaming.scala.operators.transformation import org.apache.flink.streaming.api.functions.co.CoMapFunction import org.apache.flink.streaming.api.scala.{ConnectedStreams, DataStream, StreamExecutionEnvironment} object TransformationConnect { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment import org.apache.flink.api.scala._ val dataStream1:DataStream[(String,Int)] = env.fromElements(("a",1),("b",2),("c",3),("d",4),("e",5)) val dataStream2:DataStream[Int] =env.fromElements(1,2,3,4,5) val connectdeStream:ConnectedStreams[(String, Int), Int] = dataStream1.connect(dataStream2) val resultStream = connectdeStream.map(new CoMapFunction[(String,Int),Int,(Int,String)] { override def map1(in1: (String, Int)): (Int, String) = { (in1._2,in1._1) } override def map2(in2: Int): (Int, String) = { (in2,"default") } }) resultStream.print() env.execute() } }
Example 27
Source File: TransformationUnion.scala From flink-hairless-notes with Apache License 2.0 | 5 votes |
package wang.yangting.tech.flink.streaming.scala.operators.transformation import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} object TransformationUnion { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment import org.apache.flink.api.scala._ val dataStream1:DataStream[(String,Int)]=env.fromElements(("a",1),("b",2),("c",3),("d",4),("e",5)) val dataStream2:DataStream[(String,Int)]=env.fromElements(("a",6),("b",7),("c",8),("d",9),("e",10)) val dataStream3:DataStream[(String,Int)]=env.fromElements(("a",11),("b",12),("c",13),("d",14),("e",15)) val unionStream = dataStream1.union(dataStream2) val unionStream2 = unionStream.union(dataStream3) unionStream2.print() env.execute() } }
Example 28
Source File: TransformationSplitAndSelect.scala From flink-hairless-notes with Apache License 2.0 | 5 votes |
package wang.yangting.tech.flink.streaming.scala.operators.transformation import org.apache.flink.streaming.api.scala.{DataStream, SplitStream, StreamExecutionEnvironment} object TransformationSplitAndSelect { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment import org.apache.flink.api.scala._ val dataStream1: DataStream[(String,Int)] = env.fromElements(("a",3),("d",4),("c",2),("c",5),("a",5)) val splitStream:SplitStream[(String,Int)] = dataStream1 .split(t => if (t._2 % 2 == 0) Seq("even") else Seq("old")) val evenStream : DataStream[(String,Int)] = splitStream.select("even") val oddStream : DataStream[(String,Int)] = splitStream.select("odd") evenStream.print() env.execute() } }
Example 29
Source File: TransformationFilter.scala From flink-hairless-notes with Apache License 2.0 | 5 votes |
package wang.yangting.tech.flink.streaming.scala.operators.transformation import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} object TransformationFilter { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment import org.apache.flink.api.scala._ val dataStream = env.fromElements(("a",1),("b",2),("a",3),("c",4)) //筛选出第二个元素是偶数的集合 val filter:DataStream[(String, Int)]=dataStream.filter(_._2 % 2 ==0) filter.print() env.execute() } }
Example 30
Source File: DefaultSaverITCase.scala From flink-tensorflow with Apache License 2.0 | 5 votes |
package org.apache.flink.contrib.tensorflow.io import org.apache.flink.contrib.tensorflow.models.savedmodel.DefaultSavedModelLoader import org.apache.flink.contrib.tensorflow.util.{FlinkTestBase, RegistrationUtils} import org.apache.flink.core.fs.Path import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner import org.scalatest.{Matchers, WordSpecLike} import org.tensorflow.{Session, Tensor} import scala.collection.JavaConverters._ @RunWith(classOf[JUnitRunner]) class DefaultSaverITCase extends WordSpecLike with Matchers with FlinkTestBase { override val parallelism = 1 "A DefaultSaver" should { "run the save op" in { val env = StreamExecutionEnvironment.getExecutionEnvironment RegistrationUtils.registerTypes(env.getConfig) val loader = new DefaultSavedModelLoader(new Path("../models/half_plus_two"), "serve") val bundle = loader.load() val saverDef = loader.metagraph.getSaverDef val saver = new DefaultSaver(saverDef) def getA = getVariable(bundle.session(), "a").floatValue() def setA(value: Float) = setVariable(bundle.session(), "a", Tensor.create(value)) val initialA = getA println("Initial value: " + initialA) setA(1.0f) val savePath = tempFolder.newFolder("model-0").getAbsolutePath val path = saver.save(bundle.session(), savePath) val savedA = getA savedA shouldBe (1.0f) println("Saved value: " + getA) setA(2.0f) val updatedA = getA updatedA shouldBe (2.0f) println("Updated value: " + updatedA) saver.restore(bundle.session(), path) val restoredA = getA restoredA shouldBe (savedA) println("Restored value: " + restoredA) } def getVariable(sess: Session, name: String): Tensor = { val result = sess.runner().fetch(name).run().asScala result.head } def setVariable(sess: Session, name: String, value: Tensor): Unit = { sess.runner() .addTarget(s"$name/Assign") .feed(s"$name/initial_value", value) .run() } } }