org.apache.flink.configuration.Configuration Scala Examples
The following examples show how to use org.apache.flink.configuration.Configuration.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ConsoleReporterTestJob.scala From flink-stuff with Apache License 2.0 | 6 votes |
package com.jgrier.flinkstuff.jobs import com.jgrier.flinkstuff.sources.IntegerSource import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.TimeCharacteristic import org.apache.flink.streaming.api.environment.LocalStreamEnvironment import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.api.scala._ object ConsoleReporterTestJob { def main(args: Array[String]) { val config = new Configuration() config.setString("metrics.reporters", "consoleReporter") config.setString("metrics.reporter.consoleReporter.class", "com.jgrier.flinkstuff.metrics.ConsoleReporter") config.setString("metrics.reporter.consoleReporter.interval", "10 SECONDS") val env = new StreamExecutionEnvironment(new LocalStreamEnvironment(config)) env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) val stream = env.addSource(new IntegerSource(100)) stream .timeWindowAll(Time.seconds(1)) .sum(0) .print env.execute("ConsoleReporterTestJob") } }
Example 2
Source File: RegressITCase.scala From flink-tensorflow with Apache License 2.0 | 6 votes |
package org.apache.flink.contrib.tensorflow.ml import com.twitter.bijection.Conversion._ import org.apache.flink.api.common.functions.RichFlatMapFunction import org.apache.flink.api.scala._ import org.apache.flink.configuration.Configuration import org.apache.flink.contrib.tensorflow.ml.signatures.RegressionMethod._ import org.apache.flink.contrib.tensorflow.types.TensorInjections.{message2Tensor, messages2Tensor} import org.apache.flink.contrib.tensorflow.util.TestData._ import org.apache.flink.contrib.tensorflow.util.{FlinkTestBase, RegistrationUtils} import org.apache.flink.core.fs.Path import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.util.Collector import org.apache.flink.util.Preconditions.checkState import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner import org.scalatest.{Matchers, WordSpecLike} import org.tensorflow.Tensor import org.tensorflow.contrib.scala.Arrays._ import org.tensorflow.contrib.scala.Rank._ import org.tensorflow.contrib.scala._ import org.tensorflow.example.Example import resource._ @RunWith(classOf[JUnitRunner]) class RegressITCase extends WordSpecLike with Matchers with FlinkTestBase { override val parallelism = 1 type LabeledExample = (Example, Float) def examples(): Seq[LabeledExample] = { for (v <- Seq(0.0f -> 2.0f, 1.0f -> 2.5f, 2.0f -> 3.0f, 3.0f -> 3.5f)) yield (example("x" -> feature(v._1)), v._2) } "A RegressFunction" should { "process elements" in { val env = StreamExecutionEnvironment.getExecutionEnvironment RegistrationUtils.registerTypes(env.getConfig) val model = new HalfPlusTwo(new Path("../models/half_plus_two")) val outputs = env .fromCollection(examples()) .flatMap(new RichFlatMapFunction[LabeledExample, Float] { override def open(parameters: Configuration): Unit = model.open() override def close(): Unit = model.close() override def flatMap(value: (Example, Float), out: Collector[Float]): Unit = { for { x <- managed(Seq(value._1).toList.as[Tensor].taggedAs[ExampleTensor]) y <- model.regress_x_to_y(x) } { // cast as a 1D tensor to use the available conversion val o = y.taggedAs[TypedTensor[`1D`,Float]].as[Array[Float]] val actual = o(0) checkState(actual == value._2) out.collect(actual) } } }) .print() env.execute() } } }
Example 3
Source File: StreamingDemo.scala From flink-demos with Apache License 2.0 | 5 votes |
package com.dataartisans.flink.example.eventpattern import java.text.SimpleDateFormat import java.util import java.util.{Calendar, Properties, UUID} import com.dataartisans.flink.example.eventpattern.kafka.EventDeSerializer import org.apache.flink.api.common.functions.{RuntimeContext, RichFlatMapFunction} import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor} import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.connectors.elasticsearch.{IndexRequestBuilder, ElasticsearchSink} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08 import org.apache.flink.util.Collector import org.elasticsearch.action.index.IndexRequest import org.elasticsearch.client.Requests class StateMachineMapper extends RichFlatMapFunction[Event, Alert] { private[this] var currentState: ValueState[State] = _ override def open(config: Configuration): Unit = { currentState = getRuntimeContext.getState( new ValueStateDescriptor("state", classOf[State], InitialState)) } override def flatMap(t: Event, out: Collector[Alert]): Unit = { val state = currentState.value() val nextState = state.transition(t.event) nextState match { case InvalidTransition => out.collect(Alert(t.sourceAddress, state, t.event)) case x if x.terminal => currentState.clear() case x => currentState.update(nextState) } } }
Example 4
Source File: TaxiRideProcessor.scala From pipelines-examples with Apache License 2.0 | 5 votes |
package pipelines.examples package processor import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.api.functions.co._ import org.apache.flink.api.common.state.{ ValueState, ValueStateDescriptor } import org.apache.flink.util.Collector import pipelines.streamlets.StreamletShape import pipelines.streamlets.avro._ import pipelines.flink.avro._ import pipelines.flink._ class TaxiRideProcessor extends FlinkStreamlet { // Step 1: Define inlets and outlets. Note for the outlet you need to specify // the partitioner function explicitly : here we are using the // rideId as the partitioner @transient val inTaxiRide = AvroInlet[TaxiRide]("in-taxiride") @transient val inTaxiFare = AvroInlet[TaxiFare]("in-taxifare") @transient val out = AvroOutlet[TaxiRideFare]("out", _.rideId.toString) // Step 2: Define the shape of the streamlet. In this example the streamlet // has 2 inlets and 1 outlet @transient val shape = StreamletShape.withInlets(inTaxiRide, inTaxiFare).withOutlets(out) // Step 3: Provide custom implementation of `FlinkStreamletLogic` that defines // the behavior of the streamlet override def createLogic() = new FlinkStreamletLogic { override def buildExecutionGraph = { val rides: DataStream[TaxiRide] = readStream(inTaxiRide) .filter { ride ⇒ ride.isStart.booleanValue } .keyBy("rideId") val fares: DataStream[TaxiFare] = readStream(inTaxiFare) .keyBy("rideId") val processed: DataStream[TaxiRideFare] = rides .connect(fares) .flatMap(new EnrichmentFunction) writeStream(out, processed) } } import org.apache.flink.configuration.Configuration class EnrichmentFunction extends RichCoFlatMapFunction[TaxiRide, TaxiFare, TaxiRideFare] { @transient var rideState: ValueState[TaxiRide] = null @transient var fareState: ValueState[TaxiFare] = null override def open(params: Configuration): Unit = { super.open(params) rideState = getRuntimeContext.getState( new ValueStateDescriptor[TaxiRide]("saved ride", classOf[TaxiRide])) fareState = getRuntimeContext.getState( new ValueStateDescriptor[TaxiFare]("saved fare", classOf[TaxiFare])) } override def flatMap1(ride: TaxiRide, out: Collector[TaxiRideFare]): Unit = { val fare = fareState.value if (fare != null) { fareState.clear() out.collect(new TaxiRideFare(ride.rideId, fare.totalFare)) } else { rideState.update(ride) } } override def flatMap2(fare: TaxiFare, out: Collector[TaxiRideFare]): Unit = { val ride = rideState.value if (ride != null) { rideState.clear() out.collect(new TaxiRideFare(ride.rideId, fare.totalFare)) } else { fareState.update(fare) } } } }
Example 5
Source File: CacheFile.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.tableJoin import java.io.File import java.text.SimpleDateFormat import com.venn.common.Common import org.apache.flink.api.scala._ import org.apache.flink.api.common.functions.RichMapFunction import org.apache.flink.configuration.Configuration import org.apache.flink.formats.json.JsonNodeDeserializationSchema import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer import scala.io.Source object CacheFile { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) if ("/".equals(File.separator)) { val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true) env.setStateBackend(backend) env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE) env.registerCachedFile("/opt/flink1.7/data/tablejoin.txt", "tablejoin.txt") } else { env.setMaxParallelism(1) env.setParallelism(1) // file and register name env.registerCachedFile("C:\\Users\\venn\\git\\venn\\flinkDemo\\src\\main\\resources\\data\\tablejoin.txt", "tablejoin.txt") } // cache table val sdf = new SimpleDateFormat("yyyyMMddHHmmss") val source = new FlinkKafkaConsumer[ObjectNode]("table_join", new JsonNodeDeserializationSchema, Common.getProp) env.addSource(source) .map(json => { val id = json.get("id").asText() val phone = json.get("phone").asText() Tuple2(id, phone) }) .map(new RichMapFunction[(String, String), String] { var cache = Map("" -> "") override def open(parameters: Configuration): Unit = { // read cache file val file = getRuntimeContext.getDistributedCache.getFile("tablejoin.txt") if (file.canRead) { val context = Source.fromFile(file, "utf-8").getLines().toArray context.foreach(line => { val tmp = line.split(",") cache += (tmp(0) -> tmp(1)) }) } } override def map(value: (String, String)): String = { val name = cache.get(value._1) value._1 + "," + value._2 + "," + cache.get(value._1) } }) .print() env.execute("cacheFile") } }
Example 6
Source File: CustomerTimerDemo.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.timer import java.io.File import java.sql.{Connection, DriverManager, PreparedStatement, SQLException} import java.util import java.util.{Timer, TimerTask} import org.apache.flink.api.scala._ import com.venn.common.Common import com.venn.util.TwoStringSource import org.apache.flink.api.common.functions.RichMapFunction import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.configuration.Configuration import org.apache.flink.runtime.state.filesystem.FsStateBackend import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer import org.slf4j.LoggerFactory def query() = { logger.info("query mysql") try { Class.forName(driverName) conn = DriverManager.getConnection(jdbcUrl, username, password) ps = conn.prepareStatement("select id,name from venn.timer") val rs = ps.executeQuery while (!rs.isClosed && rs.next) { val id = rs.getString(1) val name = rs.getString(2) map.put(id, name) } logger.info("get config from db size : {}", map.size()) } catch { case e@(_: ClassNotFoundException | _: SQLException) => e.printStackTrace() } finally { if (conn != null) { conn.close() } } } }) // .print() val sink = new FlinkKafkaProducer[String]("timer_out" , new SimpleStringSchema() , Common.getProp) stream.addSink(sink) env.execute(this.getClass.getName) } }
Example 7
Source File: MysqlSink1.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.jdbcOutput import java.sql.{Connection, DriverManager, PreparedStatement, SQLException} import org.apache.flink.api.common.io.OutputFormat import org.apache.flink.configuration.Configuration import org.slf4j.{Logger, LoggerFactory} class MysqlSink1 extends OutputFormat[User]{ val logger: Logger = LoggerFactory.getLogger("MysqlSink1") var conn: Connection = _ var ps: PreparedStatement = _ val jdbcUrl = "jdbc:mysql://192.168.229.128:3306?useSSL=false&allowPublicKeyRetrieval=true" val username = "root" val password = "123456" val driverName = "com.mysql.jdbc.Driver" override def configure(parameters: Configuration): Unit = { // not need } override def open(taskNumber: Int, numTasks: Int): Unit = { Class.forName(driverName) try { Class.forName(driverName) conn = DriverManager.getConnection(jdbcUrl, username, password) // close auto commit conn.setAutoCommit(false) } catch { case e@(_: ClassNotFoundException | _: SQLException) => logger.error("init mysql error") e.printStackTrace() System.exit(-1); } } override def writeRecord(user: User): Unit = { println("get user : " + user.toString) ps = conn.prepareStatement("insert into async.user(username, password, sex, phone) values(?,?,?,?)") ps.setString(1, user.username) ps.setString(2, user.password) ps.setInt(3, user.sex) ps.setString(4, user.phone) ps.execute() conn.commit() } override def close(): Unit = { if (conn != null){ conn.commit() conn.close() } } }
Example 8
Source File: MysqlSink.scala From flink-rookie with Apache License 2.0 | 5 votes |
package com.venn.stream.api.jdbcOutput import java.sql.{Connection, DriverManager, PreparedStatement, SQLException} import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction} import org.slf4j.{Logger, LoggerFactory} class MysqlSink extends RichSinkFunction[User] { val logger: Logger = LoggerFactory.getLogger("MysqlSink") var conn: Connection = _ var ps: PreparedStatement = _ val jdbcUrl = "jdbc:mysql://192.168.229.128:3306?useSSL=false&allowPublicKeyRetrieval=true" val username = "root" val password = "123456" val driverName = "com.mysql.jdbc.Driver" override def open(parameters: Configuration): Unit = { Class.forName(driverName) try { Class.forName(driverName) conn = DriverManager.getConnection(jdbcUrl, username, password) // close auto commit conn.setAutoCommit(false) } catch { case e@(_: ClassNotFoundException | _: SQLException) => logger.error("init mysql error") e.printStackTrace() System.exit(-1); } } override def invoke(user: User, context: SinkFunction.Context[_]): Unit = { println("get user : " + user.toString) ps = conn.prepareStatement("insert into async.user(username, password, sex, phone) values(?,?,?,?)") ps.setString(1, user.username) ps.setString(2, user.password) ps.setInt(3, user.sex) ps.setString(4, user.phone) ps.execute() conn.commit() } override def close(): Unit = { if (conn != null){ conn.commit() conn.close() } } }
Example 9
Source File: RangePSLogicWithClose.scala From flink-parameter-server with Apache License 2.0 | 5 votes |
package hu.sztaki.ilab.ps.server import hu.sztaki.ilab.ps.{ParameterServer, ParameterServerLogic} import org.apache.flink.api.common.functions.RuntimeContext import org.apache.flink.configuration.Configuration class RangePSLogicWithClose[P](featureCount: Int, paramInit: => Int => P, paramUpdate: => (P, P) => P) extends ParameterServerLogic[Int, P, (Int, P)] { var startIndex = 0 var params: Array[Option[P]] = _ @transient lazy val init: (Int) => P = paramInit @transient lazy val update: (P, P) => P = paramUpdate override def onPullRecv(id: Int, workerPartitionIndex: Int, ps: ParameterServer[Int, P, (Int, P)]): Unit = { if (id - startIndex < 0) { println(id) println(params.mkString("[", ",", "]")) } ps.answerPull(id, params(id - startIndex) match { case Some(e) => e case None => val ini = init(id) params(id - startIndex) = Some(ini) ini }, workerPartitionIndex) } override def onPushRecv(id: Int, deltaUpdate: P, ps: ParameterServer[Int, P, (Int, P)]): Unit = { val index = id - startIndex val c = params(index) match { case Some(q) => update(q, deltaUpdate) case None => deltaUpdate } params(index) = Some(c) } override def open(parameters: Configuration, runtimeContext: RuntimeContext): Unit = { super.open(parameters, runtimeContext) val div = Math.ceil(featureCount.toDouble / runtimeContext.getNumberOfParallelSubtasks).toInt val mod = featureCount - (runtimeContext.getNumberOfParallelSubtasks - 1) * div params = Array.fill[Option[P]]( if (mod != 0 && runtimeContext.getIndexOfThisSubtask + 1 == runtimeContext.getNumberOfParallelSubtasks) { mod } else { div })(None) startIndex = runtimeContext.getIndexOfThisSubtask * div } }
Example 10
Source File: DemoStreamEnvironment.scala From flink-streaming-demo with Apache License 2.0 | 5 votes |
package com.dataartisans.flink_demo.utils import org.apache.flink.configuration.{ConfigConstants, Configuration} import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment import org.apache.flink.streaming.api.environment.LocalStreamEnvironment object DemoStreamEnvironment { def env: StreamExecutionEnvironment = { val config = new Configuration() // start the web dashboard config.setBoolean(ConfigConstants.LOCAL_START_WEBSERVER, true) // required to start the web dashboard config.setString(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY, "./data/dummyLogFile.txt") // create a local stream execution environment new LocalStreamEnvironment(config) } }
Example 11
Source File: SinkFuncs.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.backends.flink.streaming import org.apache.flink.streaming.api.functions.sink._ import org.apache.flink.configuration.Configuration import org.zeromq._ import org.zeromq.ZMQ._ class ZmqPublisher(addr: String) extends RichSinkFunction[String]{ private var publisher: Socket = _ private val schema = new UTF8StringSchema() def initializeConnection = { try { printf("Initialize Publisher at Socket %s\n", addr) val context = ZMQ.context(1) publisher = context.socket(ZMQ.PUB) publisher.setLinger(5000); publisher.setSndHWM(0); publisher.bind(addr) Thread sleep 1000 } catch { case e: java.io.IOException => throw new RuntimeException(s"Cannot initialize connection to socket $addr") case e: Throwable => throw e } } override def invoke(in: String) = { val msg: Array[Byte] = schema.serialize(in) try { printf("Sending: %s\n", in) publisher.send(msg, 0) } catch { case e: java.io.IOException => throw new RuntimeException(s"Cannot send message ${in.toString} to socket $addr") case e: Throwable => throw e } } def closeConnection = { try { Thread sleep 1000 publisher.close } catch { case e: java.io.IOException => throw new RuntimeException(s"Error while closing connection with socket $addr") case e: Throwable => throw e } } override def open(parameters: Configuration) = { initializeConnection } override def close = { closeConnection } }
Example 12
Source File: SourceFuncs.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.backends.flink.streaming import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.source._ import org.apache.flink.streaming.api.functions.source.SourceFunction._ import org.zeromq._ import org.zeromq.ZMQ._ class ZmqSubscriber(addr: String) extends RichSourceFunction[String]{ private var subscriber: Socket = _ @volatile private var isRunning: Boolean = _ private val schema = new UTF8StringSchema() @throws(classOf[Exception]) override def open(parameters: Configuration) = { super.open(parameters) val context = ZMQ.context(1) subscriber = context.socket(ZMQ.SUB) subscriber.setRcvHWM(0) subscriber.connect(addr) subscriber.subscribe("".getBytes()) isRunning = true; } @throws(classOf[Exception]) override def run(ctx: SourceContext[String]) = { streamFromSocket(ctx, subscriber) } @throws(classOf[Exception]) def streamFromSocket(ctx: SourceContext[String], socket: Socket) = { try { while (isRunning) { try { val msg: Array[Byte] = socket.recv(0) val element: String = msg match { case msg: Array[Byte] => schema.deserialize(msg) case _ => new String(msg) } // val element: List[String] = schema.deserialize(msg) ctx.collect(element) } catch { case e: ZMQException => throw e } } } finally { socket.close } } override def cancel() = { isRunning = false try { subscriber.close } catch { case e: java.io.IOException => throw new Exception(s"Could not close open socket") } } }
Example 13
Source File: SinkFunctionExample.scala From examples-scala with Apache License 2.0 | 5 votes |
package io.github.streamingwithflink.chapter8 import java.io.PrintStream import java.net.{InetAddress, Socket} import io.github.streamingwithflink.util.{SensorReading, SensorSource, SensorTimeAssigner} import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.TimeCharacteristic import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction} import org.apache.flink.streaming.api.scala._ /** * Example program that writes sensor readings to a socket. * * NOTE: Before starting the program, you need to start a process that listens on a socket at localhost:9191. * On Linux, you can do that with nc (netcat) with the following command: * * nc -l localhost 9191 */ object SinkFunctionExample { def main(args: Array[String]): Unit = { val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment // use event time for the application env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) // configure watermark interval env.getConfig.setAutoWatermarkInterval(1000L) // ingest sensor stream val readings: DataStream[SensorReading] = env // SensorSource generates random temperature readings .addSource(new SensorSource) // assign timestamps and watermarks which are required for event time .assignTimestampsAndWatermarks(new SensorTimeAssigner) // write the sensor readings to a socket readings.addSink(new SimpleSocketSink("localhost", 9191)) // set parallelism to 1 because only one thread can write to a socket .setParallelism(1) env.execute() } } /** * Writes a stream of [[SensorReading]] to a socket. */ class SimpleSocketSink(val host: String, val port: Int) extends RichSinkFunction[SensorReading] { var socket: Socket = _ var writer: PrintStream = _ override def open(config: Configuration): Unit = { // open socket and writer socket = new Socket(InetAddress.getByName(host), port) writer = new PrintStream(socket.getOutputStream) } override def invoke( value: SensorReading, ctx: SinkFunction.Context[_]): Unit = { // write sensor reading to socket writer.println(value.toString) writer.flush() } override def close(): Unit = { // close writer and socket writer.close() socket.close() } }
Example 14
Source File: StreamingSessionExample.scala From flink_training with Apache License 2.0 | 5 votes |
package com.tmalaska.flinktraining.example.session import java.util.Properties import net.liftweb.json.DefaultFormats import net.liftweb.json.Serialization.read import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor} import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.ProcessFunction import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.util.Collector import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ object StreamingSessionExample { def main(args:Array[String]): Unit = { val kafkaServerURL = args(0) val kafkaServerPort = args(1) val kafkaTopic = args(2) val groupId = args(3) val sessionTimeOut = args(4).toInt val env = StreamExecutionEnvironment.getExecutionEnvironment //val socketStream = env.socketTextStream("localhost",9999, '\n') val properties = new Properties properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort) properties.setProperty("zookeeper.connect", "localhost:2181") properties.setProperty("group.id", groupId) println("kafkaTopic:" + kafkaTopic) val messageStream:DataStream[String] = env.addSource( new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties)) val heartBeatStream = messageStream .map(str => { implicit val formats = DefaultFormats println("str:" + str) val hb = read[HeartBeat](str) (hb.entityId, hb.eventTime) }).keyBy(0).process(new MyProcessFunction(sessionTimeOut)) heartBeatStream.map(session => { println("session:" + session) session }) heartBeatStream.print() env.execute() } } class MyProcessFunction(sessionTimeOut:Int) extends ProcessFunction[(String,Long), SessionObj] { private var state:ValueState[SessionObj] = null override def open(parameters: Configuration): Unit = { state = getRuntimeContext.getState(new ValueStateDescriptor[SessionObj]("myState", classOf[SessionObj])) } override def processElement(value: (String, Long), ctx: ProcessFunction[(String, Long), SessionObj]#Context, out: Collector[SessionObj]): Unit = { val currentSession = state.value() var outBoundSessionRecord:SessionObj = null if (currentSession == null) { outBoundSessionRecord = SessionObj(value._2, value._2, 1) } else { outBoundSessionRecord = SessionObj(currentSession.startTime, value._2, currentSession.heartbeatCount + 1) } state.update(outBoundSessionRecord) out.collect(outBoundSessionRecord) ctx.timerService.registerEventTimeTimer(System.currentTimeMillis() + sessionTimeOut) } override def onTimer(timestamp: Long, ctx: ProcessFunction[(String, Long), SessionObj]#OnTimerContext, out: Collector[SessionObj]): Unit = { val result = state.value if (result != null && result.latestEndTime + sessionTimeOut < System.currentTimeMillis()) { // emit the state on timeout state.clear() } } } case class SessionObj(startTime:Long, latestEndTime:Long, heartbeatCount:Int)
Example 15
Source File: WholeFileInputFormat.scala From flink-tensorflow with Apache License 2.0 | 5 votes |
package org.apache.flink.contrib.tensorflow.io import java.io.{EOFException, IOException, InputStream} import org.apache.flink.api.common.io.FileInputFormat import org.apache.flink.configuration.Configuration import org.apache.flink.core.fs._ import org.apache.flink.util.Preconditions.checkState @throws[IOException] def readRecord(reuse: T, filePath: Path, fileStream: FSDataInputStream, fileLength: Long): T // -------------------------------------------------------------------------------------------- // Lifecycle // -------------------------------------------------------------------------------------------- override def nextRecord(reuse: T): T = { checkState(!reachedEnd()) checkState(currentSplit != null && currentSplit.getStart == 0) checkState(stream != null) readRecord(reuse, currentSplit.getPath, stream, currentSplit.getLength) } override def reachedEnd(): Boolean = { stream.getPos != 0 } } @SerialVersionUID(1L) object WholeFileInputFormat { @throws[IOException] def readFully(fileStream: FSDataInputStream, fileLength: Long): Array[Byte] = { if(fileLength > Int.MaxValue) { throw new IllegalArgumentException("the file is too large to be fully read") } val buf = new Array[Byte](fileLength.toInt) readFully(fileStream, buf, 0, fileLength.toInt) buf } @throws[IOException] def readFully(inputStream: InputStream, buf: Array[Byte], off: Int, len: Int): Array[Byte] = { var bytesRead = 0 while (bytesRead < len) { val read = inputStream.read(buf, off + bytesRead, len - bytesRead) if (read < 0) throw new EOFException("Premature end of stream") bytesRead += read } buf } }
Example 16
Source File: ImageInputFormat.scala From flink-tensorflow with Apache License 2.0 | 5 votes |
package org.apache.flink.contrib.tensorflow.examples.inception import java.io.IOException import java.util.Collections import com.twitter.bijection.Conversion._ import org.apache.flink.api.common.io.GlobFilePathFilter import org.apache.flink.configuration.Configuration import org.apache.flink.contrib.tensorflow._ import org.apache.flink.contrib.tensorflow.common.functions.util.ModelUtils import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat._ import org.apache.flink.core.fs.{FSDataInputStream, Path} import org.slf4j.{Logger, LoggerFactory} import org.tensorflow.contrib.scala.ByteStrings._ import resource._ import scala.collection.JavaConverters._ override def readRecord( reuse: (String,ImageTensorValue), filePath: Path, fileStream: FSDataInputStream, fileLength: Long): (String,ImageTensorValue) = { if(fileLength > Int.MaxValue) { throw new IllegalArgumentException("the file is too large to be fully read") } val imageData = readFully(fileStream, new Array[Byte](fileLength.toInt), 0, fileLength.toInt).asByteString[ImageFile] val imageTensor: ImageTensorValue = managed(imageData.as[ImageFileTensor]) .flatMap(x => model.normalize(x)) .acquireAndGet(_.toValue) (filePath.getName, imageTensor) } } object ImageInputFormat { def apply(): ImageInputFormat = new ImageInputFormat }
Example 17
Source File: LeftJoinKeyedCoProcessFunction.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.runtime import com.amazon.milan.compiler.flink.internal.JoinLineageRecordFactory import com.amazon.milan.compiler.flink.metrics.MetricFactory import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation} import com.amazon.milan.types.LineageRecord import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor} import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.api.java.typeutils.ResultTypeQueryable import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction import org.apache.flink.util.{Collector, OutputTag} object LeftJoinCoProcessFunction { val LeftInputRecordsCounterMetricName = "left_input_record_count" val RightInputRecordsCounterMetricName = "right_input_record_count" val OutputRecordsCounterMetricName = "output_record_count" } import com.amazon.milan.compiler.flink.runtime.LeftJoinCoProcessFunction._ abstract class LeftJoinKeyedCoProcessFunction[TLeft >: Null, TRight >: Null, TKey >: Null <: Product, TOut >: Null](rightTypeInformation: TypeInformation[TRight], keyTypeInformation: TypeInformation[TKey], outputTypeInformation: TypeInformation[TOut], leftRecordIdExtractor: RecordIdExtractor[TLeft], rightRecordIdExtractor: RecordIdExtractor[TRight], outputRecordIdExtractor: RecordIdExtractor[TOut], lineageRecordFactory: JoinLineageRecordFactory, lineageOutputTag: OutputTag[LineageRecord], metricFactory: MetricFactory) extends KeyedCoProcessFunction[TKey, RecordWrapper[TLeft, TKey], RecordWrapper[TRight, TKey], RecordWrapper[TOut, TKey]] with ResultTypeQueryable[RecordWrapper[TOut, TKey]] { @transient private lazy val canProduceLineage = leftRecordIdExtractor.canExtractRecordId && rightRecordIdExtractor.canExtractRecordId && outputRecordIdExtractor.canExtractRecordId @transient private lazy val leftInputRecordsCounter = this.metricFactory.createCounter(this.getRuntimeContext, LeftInputRecordsCounterMetricName) @transient private lazy val rightInputRecordsCounter = this.metricFactory.createCounter(this.getRuntimeContext, RightInputRecordsCounterMetricName) @transient private lazy val outputRecordsCounter = this.metricFactory.createCounter(this.getRuntimeContext, OutputRecordsCounterMetricName) @transient private var lastRightValue: ValueState[TRight] = _ protected def map(left: TLeft, right: TRight): TOut protected def postCondition(left: TLeft, right: TRight): Boolean override def processElement1(leftRecord: RecordWrapper[TLeft, TKey], context: KeyedCoProcessFunction[TKey, RecordWrapper[TLeft, TKey], RecordWrapper[TRight, TKey], RecordWrapper[TOut, TKey]]#Context, collector: Collector[RecordWrapper[TOut, TKey]]): Unit = { this.leftInputRecordsCounter.increment() val leftValue = leftRecord.value val rightValue = this.lastRightValue.value() if (this.postCondition(leftValue, rightValue)) { val output = this.map(leftValue, rightValue) if (output != null) { if (this.canProduceLineage) { val lineageRecord = this.createLineageRecord(this.outputRecordIdExtractor(output), leftValue, rightValue) context.output(this.lineageOutputTag, lineageRecord) } collector.collect(RecordWrapper.wrap[TOut, TKey](output, leftRecord.key, 0)) this.outputRecordsCounter.increment() } } } override def processElement2(rightRecord: RecordWrapper[TRight, TKey], context: KeyedCoProcessFunction[TKey, RecordWrapper[TLeft, TKey], RecordWrapper[TRight, TKey], RecordWrapper[TOut, TKey]]#Context, collector: Collector[RecordWrapper[TOut, TKey]]): Unit = { this.rightInputRecordsCounter.increment() this.lastRightValue.update(rightRecord.value) } override def open(parameters: Configuration): Unit = { val rightValueDescriptor = new ValueStateDescriptor[TRight]("lastRightValue", this.rightTypeInformation) this.lastRightValue = this.getRuntimeContext.getState(rightValueDescriptor) } override def getProducedType: TypeInformation[RecordWrapper[TOut, TKey]] = RecordWrapperTypeInformation.wrap(this.outputTypeInformation, this.keyTypeInformation) private def createLineageRecord(outputRecordId: String, leftRecord: TLeft, rightRecord: TRight): LineageRecord = { val sourceRecords = Option(leftRecord).toSeq.map(r => this.lineageRecordFactory.createLeftRecordPointer(this.leftRecordIdExtractor(r))) ++ Option(rightRecord).toSeq.map(r => this.lineageRecordFactory.createRightRecordPointer(this.rightRecordIdExtractor(r))) this.lineageRecordFactory.createLineageRecord(outputRecordId, sourceRecords) } }
Example 18
Source File: TimeWindowFlatMapProcessWindowFunction.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.runtime import java.lang import java.time.Instant import com.amazon.milan.compiler.flink.TypeUtil import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation} import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.api.java.typeutils.ResultTypeQueryable import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction import org.apache.flink.streaming.api.windowing.windows.TimeWindow import org.apache.flink.util.Collector abstract class TimeWindowFlatMapProcessWindowFunction[T >: Null, TInKey >: Null <: Product, TOutKey >: Null <: Product](recordTypeInfo: TypeInformation[T], outKeyTypeInfo: TypeInformation[TOutKey]) extends ProcessWindowFunction[RecordWrapper[Option[T], TInKey], RecordWrapper[Option[T], TOutKey], TInKey, TimeWindow] with ResultTypeQueryable[RecordWrapper[Option[T], TOutKey]] { @transient private var sequenceNumberHelper: SequenceNumberHelper = _ protected def addWindowStartTimeToKey(key: TInKey, windowStart: Instant): TOutKey override def getProducedType: TypeInformation[RecordWrapper[Option[T], TOutKey]] = RecordWrapperTypeInformation.wrap(TypeUtil.createOptionTypeInfo(this.recordTypeInfo), this.outKeyTypeInfo) override def process(key: TInKey, context: ProcessWindowFunction[RecordWrapper[Option[T], TInKey], RecordWrapper[Option[T], TOutKey], TInKey, TimeWindow]#Context, items: lang.Iterable[RecordWrapper[Option[T], TInKey]], collector: Collector[RecordWrapper[Option[T], TOutKey]]): Unit = { val windowStartTime = Instant.ofEpochMilli(context.window().getStart) val record = items.iterator().next() val outKey = this.addWindowStartTimeToKey(record.key, windowStartTime) val outRecord = RecordWrapper.wrap(record.value, outKey, sequenceNumberHelper.increment()) collector.collect(outRecord) } override def open(parameters: Configuration): Unit = { this.sequenceNumberHelper = new SequenceNumberHelper(this.getRuntimeContext) } }
Example 19
Source File: AssignSequenceNumberProcessWindowFunctions.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.runtime import java.lang import com.amazon.milan.compiler.flink.types.RecordWrapper import org.apache.flink.configuration.Configuration import org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction import org.apache.flink.streaming.api.windowing.windows.Window import org.apache.flink.util.Collector class AssignSequenceNumberProcessAllWindowFunction[T >: Null, TKey >: Null <: Product, TWindow <: Window] extends ProcessAllWindowFunction[RecordWrapper[T, TKey], RecordWrapper[T, TKey], TWindow] { @transient private var sequenceNumberHelper: SequenceNumberHelper = _ override def process(context: ProcessAllWindowFunction[RecordWrapper[T, TKey], RecordWrapper[T, TKey], TWindow]#Context, items: lang.Iterable[RecordWrapper[T, TKey]], collector: Collector[RecordWrapper[T, TKey]]): Unit = { val item = items.iterator().next() val outputRecord = item.withSequenceNumber(this.sequenceNumberHelper.increment()) collector.collect(outputRecord) } override def open(parameters: Configuration): Unit = { this.sequenceNumberHelper = new SequenceNumberHelper(this.getRuntimeContext) } }