org.apache.flink.util.Collector Scala Examples
The following examples show how to use org.apache.flink.util.Collector.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: RegressITCase.scala From flink-tensorflow with Apache License 2.0 | 6 votes |
package org.apache.flink.contrib.tensorflow.ml import com.twitter.bijection.Conversion._ import org.apache.flink.api.common.functions.RichFlatMapFunction import org.apache.flink.api.scala._ import org.apache.flink.configuration.Configuration import org.apache.flink.contrib.tensorflow.ml.signatures.RegressionMethod._ import org.apache.flink.contrib.tensorflow.types.TensorInjections.{message2Tensor, messages2Tensor} import org.apache.flink.contrib.tensorflow.util.TestData._ import org.apache.flink.contrib.tensorflow.util.{FlinkTestBase, RegistrationUtils} import org.apache.flink.core.fs.Path import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.util.Collector import org.apache.flink.util.Preconditions.checkState import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner import org.scalatest.{Matchers, WordSpecLike} import org.tensorflow.Tensor import org.tensorflow.contrib.scala.Arrays._ import org.tensorflow.contrib.scala.Rank._ import org.tensorflow.contrib.scala._ import org.tensorflow.example.Example import resource._ @RunWith(classOf[JUnitRunner]) class RegressITCase extends WordSpecLike with Matchers with FlinkTestBase { override val parallelism = 1 type LabeledExample = (Example, Float) def examples(): Seq[LabeledExample] = { for (v <- Seq(0.0f -> 2.0f, 1.0f -> 2.5f, 2.0f -> 3.0f, 3.0f -> 3.5f)) yield (example("x" -> feature(v._1)), v._2) } "A RegressFunction" should { "process elements" in { val env = StreamExecutionEnvironment.getExecutionEnvironment RegistrationUtils.registerTypes(env.getConfig) val model = new HalfPlusTwo(new Path("../models/half_plus_two")) val outputs = env .fromCollection(examples()) .flatMap(new RichFlatMapFunction[LabeledExample, Float] { override def open(parameters: Configuration): Unit = model.open() override def close(): Unit = model.close() override def flatMap(value: (Example, Float), out: Collector[Float]): Unit = { for { x <- managed(Seq(value._1).toList.as[Tensor].taggedAs[ExampleTensor]) y <- model.regress_x_to_y(x) } { // cast as a 1D tensor to use the available conversion val o = y.taggedAs[TypedTensor[`1D`,Float]].as[Array[Float]] val actual = o(0) checkState(actual == value._2) out.collect(actual) } } }) .print() env.execute() } } }
Example 2
Source File: TweetReader.scala From flink-parameter-server with Apache License 2.0 | 5 votes |
package hu.sztaki.ilab.ps.sketch.utils import org.apache.flink.api.common.functions.RichFlatMapFunction import org.apache.flink.util.Collector class TweetReader(delimiter: String, searchWords: List[String]) extends RichFlatMapFunction[String, (String, Array[String])]{ override def flatMap(value: String, out: Collector[(String, Array[String])]): Unit = { val id = value.split(delimiter)(0) val tweet = value .split(delimiter)(5) .split(" ") .map(_.toLowerCase) .filter(searchWords.contains(_)) if(tweet.nonEmpty){ out.collect((id, tweet)) } } }
Example 3
Source File: TaxiRideProcessor.scala From pipelines-examples with Apache License 2.0 | 5 votes |
package pipelines.examples package processor import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.api.functions.co._ import org.apache.flink.api.common.state.{ ValueState, ValueStateDescriptor } import org.apache.flink.util.Collector import pipelines.streamlets.StreamletShape import pipelines.streamlets.avro._ import pipelines.flink.avro._ import pipelines.flink._ class TaxiRideProcessor extends FlinkStreamlet { // Step 1: Define inlets and outlets. Note for the outlet you need to specify // the partitioner function explicitly : here we are using the // rideId as the partitioner @transient val inTaxiRide = AvroInlet[TaxiRide]("in-taxiride") @transient val inTaxiFare = AvroInlet[TaxiFare]("in-taxifare") @transient val out = AvroOutlet[TaxiRideFare]("out", _.rideId.toString) // Step 2: Define the shape of the streamlet. In this example the streamlet // has 2 inlets and 1 outlet @transient val shape = StreamletShape.withInlets(inTaxiRide, inTaxiFare).withOutlets(out) // Step 3: Provide custom implementation of `FlinkStreamletLogic` that defines // the behavior of the streamlet override def createLogic() = new FlinkStreamletLogic { override def buildExecutionGraph = { val rides: DataStream[TaxiRide] = readStream(inTaxiRide) .filter { ride ⇒ ride.isStart.booleanValue } .keyBy("rideId") val fares: DataStream[TaxiFare] = readStream(inTaxiFare) .keyBy("rideId") val processed: DataStream[TaxiRideFare] = rides .connect(fares) .flatMap(new EnrichmentFunction) writeStream(out, processed) } } import org.apache.flink.configuration.Configuration class EnrichmentFunction extends RichCoFlatMapFunction[TaxiRide, TaxiFare, TaxiRideFare] { @transient var rideState: ValueState[TaxiRide] = null @transient var fareState: ValueState[TaxiFare] = null override def open(params: Configuration): Unit = { super.open(params) rideState = getRuntimeContext.getState( new ValueStateDescriptor[TaxiRide]("saved ride", classOf[TaxiRide])) fareState = getRuntimeContext.getState( new ValueStateDescriptor[TaxiFare]("saved fare", classOf[TaxiFare])) } override def flatMap1(ride: TaxiRide, out: Collector[TaxiRideFare]): Unit = { val fare = fareState.value if (fare != null) { fareState.clear() out.collect(new TaxiRideFare(ride.rideId, fare.totalFare)) } else { rideState.update(ride) } } override def flatMap2(fare: TaxiFare, out: Collector[TaxiRideFare]): Unit = { val ride = rideState.value if (ride != null) { rideState.clear() out.collect(new TaxiRideFare(ride.rideId, fare.totalFare)) } else { fareState.update(fare) } } } }
Example 4
Source File: ContinueRising.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.cep import java.util import org.apache.flink.api.scala._ import org.apache.flink.cep.functions.PatternProcessFunction import org.apache.flink.cep.pattern.conditions.IterativeCondition import org.apache.flink.cep.scala.CEP import org.apache.flink.cep.scala.pattern.Pattern import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.util.Collector import org.slf4j.LoggerFactory val pattern = Pattern.begin[CepDemoEvent]("first") .next("second").where(new IterativeCondition[CepDemoEvent] { override def filter(currentEvent: CepDemoEvent, context: IterativeCondition.Context[CepDemoEvent]): Boolean = { // get last event val firstList = context.getEventsForPattern("first").iterator() var lastStart: CepDemoEvent = null // get last from firstList, and get the last one while (firstList.hasNext) { lastStart = firstList.next() } if (currentEvent.volume > lastStart.volume) { true } else { false } } }) // always remember add within, it will reduce the state usage .within(Time.minutes(5 * 60 * 1000)) val patternStream = CEP.pattern(input, pattern) val result: DataStream[String] = patternStream.process( new PatternProcessFunction[CepDemoEvent, String]() { override def processMatch( events: util.Map[String, util.List[CepDemoEvent]], ctx: PatternProcessFunction.Context, out: Collector[String]): Unit = { // get the change val first = events.get("first").get(0) val second = events.get("second").get(0) val change = second.volume - first.volume out.collect("from : " + first.id + ", to " + second.id + ", change : " + change) } }) // for convenient, just print result.print() env.execute(this.getClass.getName) } }
Example 5
Source File: AfterMatchStrategyDemo.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.cep import java.util import com.venn.common.Common import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.cep.functions.PatternProcessFunction import org.apache.flink.cep.nfa.aftermatch.AfterMatchSkipStrategy import org.apache.flink.cep.pattern.conditions.IterativeCondition import org.apache.flink.cep.scala.CEP import org.apache.flink.cep.scala.pattern.Pattern import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import org.apache.flink.util.Collector import org.slf4j.LoggerFactory val noSkit = AfterMatchSkipStrategy.noSkip() val pattern = Pattern.begin[CepDemoEvent]("first").where(event => { event.name.equals("a") }) // .timesOrMore(1) .next("second").where(event => { event.name.equals("a") }) .next("third").where(event => { event.name.equals("b") }) // .notNext() // always remember add within, it will reduce the state usage // .within(Time.minutes(5 * 60 * 1000)) val patternStream = CEP.pattern(input, pattern) val result: DataStream[String] = patternStream.process( new PatternProcessFunction[CepDemoEvent, String]() { override def processMatch( events: util.Map[String, util.List[CepDemoEvent]], ctx: PatternProcessFunction.Context, out: Collector[String]): Unit = { // get the change val first = events.get("first").get(0) val second = events.get("second").get(0) val third = events.get("third").get(0) out.collect("first : " + first + ", first " + second + ", third : " + third) } }) // for convenient, just print result.print() env.execute(this.getClass.getName) } }
Example 6
Source File: ProcessWindowForTrigger.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.trigger import java.io.File import java.text.SimpleDateFormat import com.venn.common.Common import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.contrib.streaming.state.RocksDBStateBackend import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.windowing.windows.TimeWindow import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import org.apache.flink.util.Collector import org.slf4j.LoggerFactory object ProcessWindowDemoForTrigger { val logger = LoggerFactory.getLogger(this.getClass) def main(args: Array[String]): Unit = { // environment val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment env.setParallelism(1) if ("\\".equals(File.pathSeparator)) { val rock = new RocksDBStateBackend(Common.CHECK_POINT_DATA_DIR) env.setStateBackend(rock) // checkpoint interval env.enableCheckpointing(10000) } val topic = "current_day" val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") val kafkaSource = new FlinkKafkaConsumer[String](topic, new SimpleStringSchema(), Common.getProp) val stream = env.addSource(kafkaSource) .map(s => { s }) .windowAll(TumblingProcessingTimeWindows.of(Time.seconds(60))) .trigger(CountAndTimeTrigger.of(10, Time.seconds(10))) .process(new ProcessAllWindowFunction[String, String, TimeWindow] { override def process(context: Context, elements: Iterable[String], out: Collector[String]): Unit = { var count = 0 elements.iterator.foreach(s => { count += 1 }) logger.info("this trigger have : {} item", count) } }) // execute job env.execute(this.getClass.getName) } }
Example 7
Source File: MysqlOutputDemo.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.jdbcOutput import java.io.File import com.venn.common.Common import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.streaming.api.functions.ProcessFunction import org.apache.flink.streaming.api.scala.{OutputTag, StreamExecutionEnvironment} import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import org.apache.flink.util.Collector object MysqlOutputDemo { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) if ("/".equals(File.separator)) { val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true) env.setStateBackend(backend) env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE) } else { env.setMaxParallelism(1) env.setParallelism(1) } val source = new FlinkKafkaConsumer[String]("mysql_output", new SimpleStringSchema, Common.getProp) source.setStartFromLatest() env.addSource(source) .map(li => { val tmp = li.split(",") new User(tmp(0), tmp(1), tmp(2)toInt, tmp(3)) }) // .addSink(new MysqlSink1) .writeUsingOutputFormat(new MysqlSink1) env.execute("msqlOutput") } }
Example 8
Source File: BroadCastDemo.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.broadcast import java.io.File import com.venn.common.Common import com.venn.util.StringUtil import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.common.state.MapStateDescriptor import org.apache.flink.api.common.typeinfo.BasicTypeInfo import org.apache.flink.api.scala._ import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction import org.apache.flink.streaming.api.functions.source.SourceFunction import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import org.apache.flink.util.Collector object BroadCastDemo { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) if ("/".equals(File.separator)) { val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true) env.setStateBackend(backend) env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE) } else { env.setMaxParallelism(1) env.setParallelism(1) } // 配置更新流 val configSource = new FlinkKafkaConsumer[String]("broad_cast_demo", new SimpleStringSchema, Common.getProp) // 配置流的初始化,可以通过读取配置文件实现 var initFilePath = "" if ("/".equals(File.separator)){ initFilePath = "hdfs:///venn/init_file.txt" }else{ initFilePath = "D:\\idea_out\\broad_cast.txt" } val init = env.readTextFile(initFilePath) val descriptor = new MapStateDescriptor[String, String]("dynamicConfig", BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO) val configStream = env.addSource(configSource).union(init).broadcast(descriptor) val input = env.addSource(new RadomFunction) .connect(configStream) .process(new BroadcastProcessFunction[String, String, String] { override def processBroadcastElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#Context, out: Collector[String]): Unit = { println("new config : " + value) val configMap = ctx.getBroadcastState(descriptor) // process update configMap,读取配置数据,写入广播状态中 val line = value.split(",") configMap.put(line(0), line(1)) } override def processElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#ReadOnlyContext, out: Collector[String]): Unit = { // use give key, return value val configMap = ctx.getBroadcastState(descriptor) // 解析三位城市编码,根据广播状态对应的map,转码为城市对应中文 // println(value) val line = value.split(",") val code = line(0) var va = configMap.get(code) // 不能转码的数据默认输出 中国(code=xxx) if ( va == null){ va = "中国(code="+code+")"; }else{ va = va + "(code="+code+")" } out.collect(va + "," + line(1)) } }) input.print() env.execute("BroadCastDemo") } } class RadomFunction extends SourceFunction[String]{ var flag = true override def cancel(): Unit = { flag = false } override def run(ctx: SourceFunction.SourceContext[String]): Unit = { while (flag){ for (i <- 0 to 300) { var nu = i.toString while (nu.length < 3) { nu = "0" + nu } ctx.collect(nu + "," + StringUtil.getRandomString(5)) Thread.sleep(2000) } } } }
Example 9
Source File: BadDataHandler.scala From model-serving-tutorial with Apache License 2.0 | 5 votes |
package com.lightbend.modelserving.flink.wine import org.apache.flink.api.common.functions.FlatMapFunction import org.apache.flink.util.Collector import scala.util.{Failure, Success, Try} object BadDataHandler { def apply[T] = new BadDataHandler[T] } class BadDataHandler[T] extends FlatMapFunction[Try[T], T] { override def flatMap(t: Try[T], out: Collector[T]): Unit = { t match { case Success(t) => out.collect(t) case Failure(e) => println(s"BAD DATA: ${e.getMessage}") } } }
Example 10
Source File: PSOnlineMatrixFactorizationImplicitTest.scala From flink-parameter-server with Apache License 2.0 | 5 votes |
package hu.sztaki.ilab.ps.matrix.factorization import hu.sztaki.ilab.ps.matrix.factorization.utils.InputTypes import hu.sztaki.ilab.ps.matrix.factorization.utils.InputTypes.Rating import hu.sztaki.ilab.ps.matrix.factorization.utils.Utils.{ItemId, UserId} import hu.sztaki.ilab.ps.matrix.factorization.utils.Vector._ import org.apache.flink.api.common.functions.RichFlatMapFunction import org.apache.flink.streaming.api.functions.sink.RichSinkFunction import org.apache.flink.streaming.api.scala._ import org.apache.flink.util.Collector import scala.collection.mutable class PSOnlineMatrixFactorizationImplicitTest { } object PSOnlineMatrixFactorizationImplicitTest{ val numFactors = 10 val rangeMin = -0.1 val rangeMax = 0.1 val learningRate = 0.01 val userMemory = 128 val negativeSampleRate = 9 val pullLimit = 1500 val workerParallelism = 4 val psParallelism = 4 val iterationWaitTime = 10000 def main(args: Array[String]): Unit = { val input_file_name = args(0) val userVector_output_name = args(1) val itemVector_output_name = args(2) val env = StreamExecutionEnvironment.getExecutionEnvironment val data = env.readTextFile(input_file_name) val lastFM = data.flatMap(new RichFlatMapFunction[String, Rating] { override def flatMap(value: String, out: Collector[Rating]): Unit = { val fieldsArray = value.split(" ") val r = InputTypes.ratingFromTuple(fieldsArray(1).toInt, fieldsArray(2).toInt, 1.0) out.collect(r) } }) PSOnlineMatrixFactorization.psOnlineMF( lastFM, numFactors, rangeMin, rangeMax, learningRate, negativeSampleRate, userMemory, pullLimit, workerParallelism, psParallelism, iterationWaitTime) .addSink(new RichSinkFunction[Either[(UserId, Vector), (ItemId, Vector)]] { val userVectors = new mutable.HashMap[UserId, Vector] val itemVectors = new mutable.HashMap[ItemId, Vector] override def invoke(value: Either[(UserId, Vector), (ItemId, Vector)]): Unit = { value match { case Left((userId, vec)) => userVectors.update(userId, vec) case Right((itemId, vec)) => itemVectors.update(itemId, vec) } } override def close(): Unit = { val userVectorFile = new java.io.PrintWriter(new java.io.File(userVector_output_name)) for((k,v) <- userVectors){ for(value <- v){ userVectorFile.write(k + ";" + value + '\n') } } userVectorFile.close() val itemVectorFile = new java.io.PrintWriter(new java.io.File(itemVector_output_name)) for((k,v) <- itemVectors){ for(value <- v){ itemVectorFile.write(k + ";" + value + '\n') } } itemVectorFile.close() } }).setParallelism(1) env.execute() } }
Example 11
Source File: TimeAwareTugOfWar.scala From flink-parameter-server with Apache License 2.0 | 5 votes |
package hu.sztaki.ilab.ps.sketch.tug.of.war import hu.sztaki.ilab.ps.sketch.tug.of.war.pslogic.TimeAwareToWPSLogic import hu.sztaki.ilab.ps.{FlinkParameterServer, ParameterServerClient, WorkerLogic} import hu.sztaki.ilab.ps.sketch.utils.Utils.Vector import net.openhft.hashing.LongHashFunction import org.apache.flink.api.common.functions.RichFlatMapFunction import org.apache.flink.streaming.api.scala._ import org.apache.flink.util.Collector class TimeAwareTugOfWar { } object TimeAwareTugOfWar { def tugOfWar(src: DataStream[(String, Array[String], Int)], numHashes: Int, workerParallelism: Int, psParallelism: Int, iterationWaitTime: Long) : DataStream[((Int, Int), Vector)] = { val workerLogic = new WorkerLogic[(String, Array[String], Int), Int, (Int, Array[Long]), Any] { override def onRecv(data: (String, Array[String], Int), ps: ParameterServerClient[Int, (Int, Array[Long]), Any]): Unit = { val id = data._1.toLong val tweet = data._2 val hashArray = (for (i <- 0 to math.ceil(numHashes / 64).toInt) yield LongHashFunction.xx(i).hashLong(id)).toArray for(word <- tweet) { ps.push(word.hashCode, (data._3, hashArray)) } } override def onPullRecv(paramId: Int, paramValue: (Int, Array[Long]), ps: ParameterServerClient[Int, (Int, Array[Long]), Any]): Unit = ??? } val serverLogic = new TimeAwareToWPSLogic(numHashes) val modelUpdates = FlinkParameterServer.transform( src, workerLogic, serverLogic, workerParallelism, psParallelism, iterationWaitTime) modelUpdates .flatMap(new RichFlatMapFunction[Either[Any, ((Int, Int), Vector)], ((Int, Int), Vector)] { override def flatMap(value: Either[Any, ((Int, Int), Vector)], out: Collector[ ((Int, Int), Vector)]): Unit = { value match { case Left(_) => case Right(param) => out.collect(param) } } }) } }
Example 12
Source File: TugOfWar.scala From flink-parameter-server with Apache License 2.0 | 5 votes |
package hu.sztaki.ilab.ps.sketch.tug.of.war import hu.sztaki.ilab.ps.sketch.utils.Utils._ import hu.sztaki.ilab.ps.sketch.tug.of.war.pslogic.BitSetBasedPSLogic import hu.sztaki.ilab.ps.{FlinkParameterServer, ParameterServerClient, WorkerLogic} import net.openhft.hashing.LongHashFunction import org.apache.flink.api.common.functions.RichFlatMapFunction import org.apache.flink.streaming.api.scala._ import org.apache.flink.util.Collector override def onPullRecv(paramId: Int, paramValue: Array[Long], ps: ParameterServerClient[Int, Array[Long], Array[String]]): Unit = ??? } val serverLogic = new BitSetBasedPSLogic(numHashes) val modelUpdates = FlinkParameterServer.transform( src, workerLogic, serverLogic, workerParallelism, psParallelism, iterationWaitTime) modelUpdates .flatMap(new RichFlatMapFunction[Either[Array[String], (Int, Vector)], (Int, Vector)] { override def flatMap(value: Either[Array[String], (Int, Vector)], out: Collector[(Int, Vector)]): Unit = { value match { case Left(_) => case Right(param) => out.collect(param) } } }) } }
Example 13
Source File: TimeAwareBloomFilter.scala From flink-parameter-server with Apache License 2.0 | 5 votes |
package hu.sztaki.ilab.ps.sketch.bloom.filter import java.lang.Math.floorMod import hu.sztaki.ilab.ps.sketch.bloom.filter.pslogic.TimeAwareBloomPSLogic import hu.sztaki.ilab.ps.{FlinkParameterServer, ParameterServerClient, WorkerLogic} import hu.sztaki.ilab.ps.sketch.utils.Utils._ import org.apache.flink.api.common.functions.RichFlatMapFunction import org.apache.flink.streaming.api.scala._ import org.apache.flink.util.Collector import scala.collection.mutable override def onPullRecv(paramId: Int, paramValue: (Int, Vector), ps: ParameterServerClient[Int, (Int, Vector), Any]): Unit = ??? } val serverLogic = new TimeAwareBloomPSLogic val modelUpdates = FlinkParameterServer.transform( src, workerLogic, serverLogic, workerParallelism, psParallelism, iterationWaitTime) modelUpdates .flatMap( new RichFlatMapFunction[Either[Any, ((Int, Int), mutable.BitSet)], ((Int, Int), mutable.BitSet)] { override def flatMap(value: Either[Any, ((Int, Int), mutable.BitSet)], out: Collector[( (Int, Int), mutable.BitSet)]): Unit = { value match { case Left(_) => case Right(param) => out.collect(param) } } }) } }
Example 14
Source File: BloomFilter.scala From flink-parameter-server with Apache License 2.0 | 5 votes |
package hu.sztaki.ilab.ps.sketch.bloom.filter import java.lang.Math.floorMod import hu.sztaki.ilab.ps.sketch.bloom.filter.pslogic.BloomPSLogic import hu.sztaki.ilab.ps.{FlinkParameterServer, ParameterServerClient, WorkerLogic} import org.apache.flink.api.common.functions.RichFlatMapFunction import org.apache.flink.streaming.api.scala._ import org.apache.flink.util.Collector import hu.sztaki.ilab.ps.sketch.utils.Utils._ import scala.collection.mutable override def onPullRecv(paramId: Int, paramValue: Vector, ps: ParameterServerClient[Int, Vector, Array[String]]): Unit = ??? } val serverLogic = new BloomPSLogic val modelUpdates = FlinkParameterServer.transform( src, workerLogic, serverLogic, workerParallelism, psParallelism, iterationWaitTime) modelUpdates .flatMap(new RichFlatMapFunction[Either[Array[String], (Int, mutable.BitSet)], (Int, mutable.BitSet)] { override def flatMap(value: Either[Array[String], (Int, mutable.BitSet)], out: Collector[(Int, mutable.BitSet)]): Unit = { value match { case Left(_) => case Right(param) => out.collect(param) } } }) } }
Example 15
Source File: TimeAwareTweetReader.scala From flink-parameter-server with Apache License 2.0 | 5 votes |
package hu.sztaki.ilab.ps.sketch.utils import org.apache.flink.api.common.functions.RichFlatMapFunction import org.apache.flink.util.Collector class TimeAwareTweetReader(delimiter: String, searchWords: List[String], timeStamp: Long, windowSize: Int) extends RichFlatMapFunction[String, (String, Array[String], Int)]{ override def flatMap(value: String, out: Collector[(String, Array[String], Int)]): Unit = { val id = value.split(delimiter)(0) val tweet = value.split(delimiter)(5).split(" ").map(_.toLowerCase).filter(searchWords.contains(_)) val timeSlot = ((value.split(delimiter)(1).toLong - timeStamp) / (windowSize * 60 * 60)).toInt if(tweet.nonEmpty){ out.collect((id, tweet, timeSlot)) } } }
Example 16
Source File: AverageSensorReadings.scala From examples-scala with Apache License 2.0 | 5 votes |
package io.github.streamingwithflink.chapter1 import io.github.streamingwithflink.util.{SensorReading, SensorSource, SensorTimeAssigner} import org.apache.flink.streaming.api.TimeCharacteristic import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.api.scala.function.WindowFunction import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.windowing.windows.TimeWindow import org.apache.flink.util.Collector override def apply( sensorId: String, window: TimeWindow, vals: Iterable[SensorReading], out: Collector[SensorReading]): Unit = { // compute the average temperature val (cnt, sum) = vals.foldLeft((0, 0.0))((c, r) => (c._1 + 1, c._2 + r.temperature)) val avgTemp = sum / cnt // emit a SensorReading with the average temperature out.collect(SensorReading(sensorId, window.getEnd, avgTemp)) } }
Example 17
Source File: DataStreamMatcher.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.cep.flink import scala.reflect.ClassTag import dbis.piglet.cep.nfa.NFAController import dbis.piglet.cep.engines._ import dbis.piglet.cep.ops.SelectionStrategy._ import dbis.piglet.cep.ops.OutputStrategy._ import dbis.piglet.backends.{SchemaClass => Event} import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.streaming.api.windowing.windows.GlobalWindow import org.apache.flink.streaming.api.windowing.assigners.GlobalWindows import dbis.piglet.cep.ops.MatchCollector import dbis.piglet.cep.ops.SelectionStrategy //import org.apache.flink.api.java.operators.CustomUnaryOperation import scala.collection.mutable.ListBuffer //import org.apache.flink.api.java.DataSet //import org.apache.flink.api.java.ExecutionEnvironment import scala.collection.JavaConversions._ import org.apache.flink.streaming.api.scala._ import dbis.piglet.cep.ops.EngineConf import org.apache.flink.util.Collector class DataStreamMatcher[T <: Event: ClassTag: TypeInformation](@transient val input: DataStream[T], nfa: NFAController[T], flinkEnv: StreamExecutionEnvironment, sstr: SelectionStrategy = SelectionStrategy.FirstMatch, out: OutputStrategy = Combined) extends EngineConf[T](nfa, sstr) with java.io.Serializable { object DataStreamProcess { def customRun(gw: GlobalWindow, ts: Iterable[T], out: Collector[T]) = { ts.foreach { event => engine.runEngine(event)} val result = collector.convertEventsToArray() result.foreach { res => out.collect(res) } } } def compute(): DataStream[T] = { input.windowAll(GlobalWindows.create()).apply(DataStreamProcess.customRun _) } }
Example 18
Source File: FlinkStreamlets.scala From cloudflow with Apache License 2.0 | 5 votes |
package cloudflow.flink import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.api.functions.co._ import org.apache.flink.api.common.state.{ ValueState, ValueStateDescriptor } import org.apache.flink.util.Collector import cloudflow.streamlets.StreamletShape import cloudflow.streamlets.avro._ import cloudflow.flink.avro._ object FlinkConnectedProcessor extends FlinkStreamlet { // Step 1: Define inlets and outlets. Note for the outlet you need to specify // the partitioner function explicitly val inTaxiRide = AvroInlet[TaxiRide]("in-taxiride") val inTaxiFare = AvroInlet[TaxiFare]("in-taxifare") val out = AvroOutlet[TaxiRideFare]("out", _.rideId.toString) // Step 2: Define the shape of the streamlet. In this example the streamlet // has 2 inlets and 1 outlet val shape = StreamletShape.withInlets(inTaxiRide, inTaxiFare).withOutlets(out) // Step 3: Provide custom implementation of `FlinkStreamletLogic` that defines // the behavior of the streamlet override def createLogic() = new FlinkStreamletLogic { override def buildExecutionGraph = { val rides: DataStream[TaxiRide] = readStream(inTaxiRide) .filter { ride ⇒ ride.isStart.booleanValue } .keyBy("rideId") // rides.print() val fares: DataStream[TaxiFare] = readStream(inTaxiFare) .keyBy("rideId") // fares.print() val processed: DataStream[TaxiRideFare] = rides .connect(fares) .flatMap(new EnrichmentFunction) // processed.print() writeStream(out, processed) } } class EnrichmentFunction extends RichCoFlatMapFunction[TaxiRide, TaxiFare, TaxiRideFare] { // keyed, managed state lazy val rideState: ValueState[TaxiRide] = getRuntimeContext.getState(new ValueStateDescriptor[TaxiRide]("saved ride", classOf[TaxiRide])) lazy val fareState: ValueState[TaxiFare] = getRuntimeContext.getState(new ValueStateDescriptor[TaxiFare]("saved fare", classOf[TaxiFare])) override def flatMap1(ride: TaxiRide, out: Collector[TaxiRideFare]): Unit = { val fare = fareState.value if (fare != null) { fareState.clear() out.collect(new TaxiRideFare(ride.rideId, fare.totalFare)) } else { rideState.update(ride) } } override def flatMap2(fare: TaxiFare, out: Collector[TaxiRideFare]): Unit = { val ride = rideState.value if (ride != null) { rideState.clear() out.collect(new TaxiRideFare(ride.rideId, fare.totalFare)) } else { fareState.update(fare) } } } }
Example 19
Source File: SocketGenerator.scala From flink-demos with Apache License 2.0 | 5 votes |
package com.dataartisans.flink.example.eventpattern.Socket import java.net.{InetAddress, InetSocketAddress} import java.nio.{ByteOrder, ByteBuffer} import java.nio.channels.SocketChannel import com.dataartisans.flink.example.eventpattern.{StandaloneGeneratorBase, Event} import org.apache.flink.util.Collector object SocketGenerator extends StandaloneGeneratorBase { val BASE_PORT = 51762 def main(args: Array[String]): Unit = { val numPartitions = 4 //args(0).toInt val collectors = new Array[SocketCollector](numPartitions) // create the generator threads for (i <- 0 until collectors.length) { collectors(i) = new SocketCollector(BASE_PORT + i) } runGenerator(collectors) } } class SocketCollector(val port: Int) extends Collector[Event] { val channel = SocketChannel.open(new InetSocketAddress(InetAddress.getByName("localhost"), port)) channel.configureBlocking(true) channel.finishConnect() val buffer = ByteBuffer.allocateDirect(4096).order(ByteOrder.LITTLE_ENDIAN) override def collect(t: Event): Unit = { if (buffer.remaining() < 8) { buffer.flip() channel.write(buffer) buffer.clear() } buffer.putInt(t.sourceAddress) buffer.putInt(t.event) } override def close(): Unit = { if (buffer.position() > 0) { buffer.flip() channel.write(buffer) } channel.close() } }
Example 20
Source File: StreamingDemo.scala From flink-demos with Apache License 2.0 | 5 votes |
package com.dataartisans.flink.example.eventpattern import java.text.SimpleDateFormat import java.util import java.util.{Calendar, Properties, UUID} import com.dataartisans.flink.example.eventpattern.kafka.EventDeSerializer import org.apache.flink.api.common.functions.{RuntimeContext, RichFlatMapFunction} import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor} import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.connectors.elasticsearch.{IndexRequestBuilder, ElasticsearchSink} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08 import org.apache.flink.util.Collector import org.elasticsearch.action.index.IndexRequest import org.elasticsearch.client.Requests class StateMachineMapper extends RichFlatMapFunction[Event, Alert] { private[this] var currentState: ValueState[State] = _ override def open(config: Configuration): Unit = { currentState = getRuntimeContext.getState( new ValueStateDescriptor("state", classOf[State], InitialState)) } override def flatMap(t: Event, out: Collector[Alert]): Unit = { val state = currentState.value() val nextState = state.transition(t.event) nextState match { case InvalidTransition => out.collect(Alert(t.sourceAddress, state, t.event)) case x if x.terminal => currentState.clear() case x => currentState.update(nextState) } } }
Example 21
Source File: StreamingSessionExample.scala From flink_training with Apache License 2.0 | 5 votes |
package com.tmalaska.flinktraining.example.session import java.util.Properties import net.liftweb.json.DefaultFormats import net.liftweb.json.Serialization.read import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor} import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.ProcessFunction import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.util.Collector import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ object StreamingSessionExample { def main(args:Array[String]): Unit = { val kafkaServerURL = args(0) val kafkaServerPort = args(1) val kafkaTopic = args(2) val groupId = args(3) val sessionTimeOut = args(4).toInt val env = StreamExecutionEnvironment.getExecutionEnvironment //val socketStream = env.socketTextStream("localhost",9999, '\n') val properties = new Properties properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) properties.setProperty("zookeeper.connect", "localhost:2181") properties.setProperty("group.id", groupId) println("kafkaTopic:" + kafkaTopic) val messageStream:DataStream[String] = env.addSource( new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties)) val heartBeatStream = messageStream .map(str => { implicit val formats = DefaultFormats println("str:" + str) val hb = read[HeartBeat](str) (hb.entityId, hb.eventTime) }).keyBy(0).process(new MyProcessFunction(sessionTimeOut)) heartBeatStream.map(session => { println("session:" + session) session }) heartBeatStream.print() env.execute() } } class MyProcessFunction(sessionTimeOut:Int) extends ProcessFunction[(String,Long), SessionObj] { private var state:ValueState[SessionObj] = null override def open(parameters: Configuration): Unit = { state = getRuntimeContext.getState(new ValueStateDescriptor[SessionObj]("myState", classOf[SessionObj])) } override def processElement(value: (String, Long), ctx: ProcessFunction[(String, Long), SessionObj]#Context, out: Collector[SessionObj]): Unit = { val currentSession = state.value() var outBoundSessionRecord:SessionObj = null if (currentSession == null) { outBoundSessionRecord = SessionObj(value._2, value._2, 1) } else { outBoundSessionRecord = SessionObj(currentSession.startTime, value._2, currentSession.heartbeatCount + 1) } state.update(outBoundSessionRecord) out.collect(outBoundSessionRecord) ctx.timerService.registerEventTimeTimer(System.currentTimeMillis() + sessionTimeOut) } override def onTimer(timestamp: Long, ctx: ProcessFunction[(String, Long), SessionObj]#OnTimerContext, out: Collector[SessionObj]): Unit = { val result = state.value if (result != null && result.latestEndTime + sessionTimeOut < System.currentTimeMillis()) { // emit the state on timeout state.clear() } } } case class SessionObj(startTime:Long, latestEndTime:Long, heartbeatCount:Int)
Example 22
Source File: LeftJoinKeyedCoProcessFunction.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.runtime import com.amazon.milan.compiler.flink.internal.JoinLineageRecordFactory import com.amazon.milan.compiler.flink.metrics.MetricFactory import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation} import com.amazon.milan.types.LineageRecord import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor} import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.api.java.typeutils.ResultTypeQueryable import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction import org.apache.flink.util.{Collector, OutputTag} object LeftJoinCoProcessFunction { val LeftInputRecordsCounterMetricName = "left_input_record_count" val RightInputRecordsCounterMetricName = "right_input_record_count" val OutputRecordsCounterMetricName = "output_record_count" } import com.amazon.milan.compiler.flink.runtime.LeftJoinCoProcessFunction._ abstract class LeftJoinKeyedCoProcessFunction[TLeft >: Null, TRight >: Null, TKey >: Null <: Product, TOut >: Null](rightTypeInformation: TypeInformation[TRight], keyTypeInformation: TypeInformation[TKey], outputTypeInformation: TypeInformation[TOut], leftRecordIdExtractor: RecordIdExtractor[TLeft], rightRecordIdExtractor: RecordIdExtractor[TRight], outputRecordIdExtractor: RecordIdExtractor[TOut], lineageRecordFactory: JoinLineageRecordFactory, lineageOutputTag: OutputTag[LineageRecord], metricFactory: MetricFactory) extends KeyedCoProcessFunction[TKey, RecordWrapper[TLeft, TKey], RecordWrapper[TRight, TKey], RecordWrapper[TOut, TKey]] with ResultTypeQueryable[RecordWrapper[TOut, TKey]] { @transient private lazy val canProduceLineage = leftRecordIdExtractor.canExtractRecordId && rightRecordIdExtractor.canExtractRecordId && outputRecordIdExtractor.canExtractRecordId @transient private lazy val leftInputRecordsCounter = this.metricFactory.createCounter(this.getRuntimeContext, LeftInputRecordsCounterMetricName) @transient private lazy val rightInputRecordsCounter = this.metricFactory.createCounter(this.getRuntimeContext, RightInputRecordsCounterMetricName) @transient private lazy val outputRecordsCounter = this.metricFactory.createCounter(this.getRuntimeContext, OutputRecordsCounterMetricName) @transient private var lastRightValue: ValueState[TRight] = _ protected def map(left: TLeft, right: TRight): TOut protected def postCondition(left: TLeft, right: TRight): Boolean override def processElement1(leftRecord: RecordWrapper[TLeft, TKey], context: KeyedCoProcessFunction[TKey, RecordWrapper[TLeft, TKey], RecordWrapper[TRight, TKey], RecordWrapper[TOut, TKey]]#Context, collector: Collector[RecordWrapper[TOut, TKey]]): Unit = { this.leftInputRecordsCounter.increment() val leftValue = leftRecord.value val rightValue = this.lastRightValue.value() if (this.postCondition(leftValue, rightValue)) { val output = this.map(leftValue, rightValue) if (output != null) { if (this.canProduceLineage) { val lineageRecord = this.createLineageRecord(this.outputRecordIdExtractor(output), leftValue, rightValue) context.output(this.lineageOutputTag, lineageRecord) } collector.collect(RecordWrapper.wrap[TOut, TKey](output, leftRecord.key, 0)) this.outputRecordsCounter.increment() } } } override def processElement2(rightRecord: RecordWrapper[TRight, TKey], context: KeyedCoProcessFunction[TKey, RecordWrapper[TLeft, TKey], RecordWrapper[TRight, TKey], RecordWrapper[TOut, TKey]]#Context, collector: Collector[RecordWrapper[TOut, TKey]]): Unit = { this.rightInputRecordsCounter.increment() this.lastRightValue.update(rightRecord.value) } override def open(parameters: Configuration): Unit = { val rightValueDescriptor = new ValueStateDescriptor[TRight]("lastRightValue", this.rightTypeInformation) this.lastRightValue = this.getRuntimeContext.getState(rightValueDescriptor) } override def getProducedType: TypeInformation[RecordWrapper[TOut, TKey]] = RecordWrapperTypeInformation.wrap(this.outputTypeInformation, this.keyTypeInformation) private def createLineageRecord(outputRecordId: String, leftRecord: TLeft, rightRecord: TRight): LineageRecord = { val sourceRecords = Option(leftRecord).toSeq.map(r => this.lineageRecordFactory.createLeftRecordPointer(this.leftRecordIdExtractor(r))) ++ Option(rightRecord).toSeq.map(r => this.lineageRecordFactory.createRightRecordPointer(this.rightRecordIdExtractor(r))) this.lineageRecordFactory.createLineageRecord(outputRecordId, sourceRecords) } }
Example 23
Source File: TimeWindowFlatMapProcessWindowFunction.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.runtime import java.lang import java.time.Instant import com.amazon.milan.compiler.flink.TypeUtil import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation} import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.api.java.typeutils.ResultTypeQueryable import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction import org.apache.flink.streaming.api.windowing.windows.TimeWindow import org.apache.flink.util.Collector abstract class TimeWindowFlatMapProcessWindowFunction[T >: Null, TInKey >: Null <: Product, TOutKey >: Null <: Product](recordTypeInfo: TypeInformation[T], outKeyTypeInfo: TypeInformation[TOutKey]) extends ProcessWindowFunction[RecordWrapper[Option[T], TInKey], RecordWrapper[Option[T], TOutKey], TInKey, TimeWindow] with ResultTypeQueryable[RecordWrapper[Option[T], TOutKey]] { @transient private var sequenceNumberHelper: SequenceNumberHelper = _ protected def addWindowStartTimeToKey(key: TInKey, windowStart: Instant): TOutKey override def getProducedType: TypeInformation[RecordWrapper[Option[T], TOutKey]] = RecordWrapperTypeInformation.wrap(TypeUtil.createOptionTypeInfo(this.recordTypeInfo), this.outKeyTypeInfo) override def process(key: TInKey, context: ProcessWindowFunction[RecordWrapper[Option[T], TInKey], RecordWrapper[Option[T], TOutKey], TInKey, TimeWindow]#Context, items: lang.Iterable[RecordWrapper[Option[T], TInKey]], collector: Collector[RecordWrapper[Option[T], TOutKey]]): Unit = { val windowStartTime = Instant.ofEpochMilli(context.window().getStart) val record = items.iterator().next() val outKey = this.addWindowStartTimeToKey(record.key, windowStartTime) val outRecord = RecordWrapper.wrap(record.value, outKey, sequenceNumberHelper.increment()) collector.collect(outRecord) } override def open(parameters: Configuration): Unit = { this.sequenceNumberHelper = new SequenceNumberHelper(this.getRuntimeContext) } }
Example 24
Source File: UnpackOptionProcessFunction.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.runtime import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation} import com.typesafe.scalalogging.Logger import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.api.java.typeutils.ResultTypeQueryable import org.apache.flink.streaming.api.functions.ProcessFunction import org.apache.flink.util.Collector import org.slf4j.LoggerFactory class UnpackOptionProcessFunction[T >: Null, TKey >: Null <: Product](recordType: TypeInformation[T], keyType: TypeInformation[TKey]) extends ProcessFunction[RecordWrapper[Option[T], TKey], RecordWrapper[T, TKey]] with ResultTypeQueryable[RecordWrapper[T, TKey]] { @transient private lazy val logger = Logger(LoggerFactory.getLogger(getClass)) override def processElement(record: RecordWrapper[Option[T], TKey], context: ProcessFunction[RecordWrapper[Option[T], TKey], RecordWrapper[T, TKey]]#Context, collector: Collector[RecordWrapper[T, TKey]]): Unit = { if (record.value.isDefined) { collector.collect(RecordWrapper.wrap(record.value.get, record.key, record.sequenceNumber)) } } override def getProducedType: TypeInformation[RecordWrapper[T, TKey]] = RecordWrapperTypeInformation.wrap(this.recordType, this.keyType) }
Example 25
Source File: IdentityFlatMapFunction.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.runtime import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation} import org.apache.flink.api.common.functions.FlatMapFunction import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.api.java.typeutils.ResultTypeQueryable import org.apache.flink.util.Collector class IdentityFlatMapFunction[T >: Null, TKey >: Null <: Product](recordTypeInformation: TypeInformation[T], keyTypeInformation: TypeInformation[TKey]) extends FlatMapFunction[RecordWrapper[T, TKey], RecordWrapper[T, TKey]] with ResultTypeQueryable[RecordWrapper[T, TKey]] { override def flatMap(record: RecordWrapper[T, TKey], collector: Collector[RecordWrapper[T, TKey]]): Unit = { collector.collect(record) } override def getProducedType: TypeInformation[RecordWrapper[T, TKey]] = RecordWrapperTypeInformation.wrap(this.recordTypeInformation, this.keyTypeInformation) }
Example 26
Source File: AssignSequenceNumberProcessWindowFunctions.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.runtime import java.lang import com.amazon.milan.compiler.flink.types.RecordWrapper import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction import org.apache.flink.streaming.api.windowing.windows.Window import org.apache.flink.util.Collector class AssignSequenceNumberProcessAllWindowFunction[T >: Null, TKey >: Null <: Product, TWindow <: Window] extends ProcessAllWindowFunction[RecordWrapper[T, TKey], RecordWrapper[T, TKey], TWindow] { @transient private var sequenceNumberHelper: SequenceNumberHelper = _ override def process(context: ProcessAllWindowFunction[RecordWrapper[T, TKey], RecordWrapper[T, TKey], TWindow]#Context, items: lang.Iterable[RecordWrapper[T, TKey]], collector: Collector[RecordWrapper[T, TKey]]): Unit = { val item = items.iterator().next() val outputRecord = item.withSequenceNumber(this.sequenceNumberHelper.increment()) collector.collect(outputRecord) } override def open(parameters: Configuration): Unit = { this.sequenceNumberHelper = new SequenceNumberHelper(this.getRuntimeContext) } }
Example 27
Source File: EvaluationFunctionSpec.scala From flink-jpmml with GNU Affero General Public License v3.0 | 5 votes |
package io.radicalbit.flink.pmml.scala.api.functions import io.radicalbit.flink.pmml.scala.api.PmmlModel import io.radicalbit.flink.pmml.scala.api.reader.ModelReader import io.radicalbit.flink.pmml.scala.models.prediction.{Prediction, Score} import io.radicalbit.flink.pmml.scala.utils.models.Input import io.radicalbit.flink.pmml.scala.utils.PmmlLoaderKit import io.radicalbit.flink.streaming.spec.core.{FlinkPipelineTestKit, FlinkTestKitCompanion} import org.apache.flink.api.scala.ClosureCleaner import org.apache.flink.streaming.api.scala._ import org.apache.flink.util.Collector import org.scalatest.{Matchers, WordSpecLike} object EvaluationFunctionSpec extends FlinkTestKitCompanion[Prediction] class EvaluationFunctionSpec extends FlinkPipelineTestKit[Input, Prediction] with WordSpecLike with Matchers with PmmlLoaderKit { private implicit val companion = EvaluationFunctionSpec private val reader = ModelReader(getPMMLSource(Source.KmeansPmml)) private def evaluationOperator[T](source: ModelReader)(f: (T, PmmlModel) => Prediction) = new EvaluationFunction[T, Prediction](source) { override def flatMap(value: T, out: Collector[Prediction]): Unit = out.collect(f(value, evaluator)) } private val operator = evaluationOperator(reader) { (in: Input, model: PmmlModel) => Prediction(Score(1.0)) } private def pipeline(source: DataStream[Input]): DataStream[Prediction] = source.flatMap(operator) "EvaluationFunction" should { "be Serializable" in { noException should be thrownBy ClosureCleaner.clean(operator, checkSerializable = true) } "return expected behavior on given function" in { executePipeline(Seq(Input(1.0, 2.0)))(pipeline) shouldBe Seq(Prediction(Score(1.0))) } } }
Example 28
Source File: MultiStreamTransformations.scala From examples-scala with Apache License 2.0 | 5 votes |
package io.github.streamingwithflink.chapter5 import io.github.streamingwithflink.chapter5.util.{Alert, SmokeLevel, SmokeLevelSource} import io.github.streamingwithflink.chapter5.util.SmokeLevel.SmokeLevel import io.github.streamingwithflink.util.{SensorReading, SensorSource, SensorTimeAssigner} import org.apache.flink.streaming.api.TimeCharacteristic import org.apache.flink.streaming.api.functions.co.CoFlatMapFunction import org.apache.flink.streaming.api.scala._ import org.apache.flink.util.Collector class RaiseAlertFlatMap extends CoFlatMapFunction[SensorReading, SmokeLevel, Alert] { var smokeLevel = SmokeLevel.Low override def flatMap1(in1: SensorReading, collector: Collector[Alert]): Unit = { // high chance of fire => true if (smokeLevel.equals(SmokeLevel.High) && in1.temperature > 100) { collector.collect(Alert("Risk of fire!", in1.timestamp)) } } override def flatMap2(in2: SmokeLevel, collector: Collector[Alert]): Unit = { smokeLevel = in2 } } }