org.apache.flink.configuration.Configuration Scala Example

Source File: ConsoleReporterTestJob.scala From flink-stuff with Apache License 2.0

6 votes

package com.jgrier.flinkstuff.jobs

import com.jgrier.flinkstuff.sources.IntegerSource
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.environment.LocalStreamEnvironment
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.api.scala._

object ConsoleReporterTestJob {
  def main(args: Array[String]) {
    val config = new Configuration()
    config.setString("metrics.reporters", "consoleReporter")
    config.setString("metrics.reporter.consoleReporter.class", "com.jgrier.flinkstuff.metrics.ConsoleReporter")
    config.setString("metrics.reporter.consoleReporter.interval", "10 SECONDS")

    val env = new StreamExecutionEnvironment(new LocalStreamEnvironment(config))
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    val stream = env.addSource(new IntegerSource(100))

    stream
      .timeWindowAll(Time.seconds(1))
      .sum(0)
      .print

    env.execute("ConsoleReporterTestJob")
  }
}

Source File: RegressITCase.scala From flink-tensorflow with Apache License 2.0

6 votes

package org.apache.flink.contrib.tensorflow.ml

import com.twitter.bijection.Conversion._
import org.apache.flink.api.common.functions.RichFlatMapFunction
import org.apache.flink.api.scala._
import org.apache.flink.configuration.Configuration
import org.apache.flink.contrib.tensorflow.ml.signatures.RegressionMethod._
import org.apache.flink.contrib.tensorflow.types.TensorInjections.{message2Tensor, messages2Tensor}
import org.apache.flink.contrib.tensorflow.util.TestData._
import org.apache.flink.contrib.tensorflow.util.{FlinkTestBase, RegistrationUtils}
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.util.Collector
import org.apache.flink.util.Preconditions.checkState
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Matchers, WordSpecLike}
import org.tensorflow.Tensor
import org.tensorflow.contrib.scala.Arrays._
import org.tensorflow.contrib.scala.Rank._
import org.tensorflow.contrib.scala._
import org.tensorflow.example.Example
import resource._

@RunWith(classOf[JUnitRunner])
class RegressITCase extends WordSpecLike
  with Matchers
  with FlinkTestBase {

  override val parallelism = 1

  type LabeledExample = (Example, Float)

  def examples(): Seq[LabeledExample] = {
    for (v <- Seq(0.0f -> 2.0f, 1.0f -> 2.5f, 2.0f -> 3.0f, 3.0f -> 3.5f))
      yield (example("x" -> feature(v._1)), v._2)
  }

  "A RegressFunction" should {
    "process elements" in {
      val env = StreamExecutionEnvironment.getExecutionEnvironment
      RegistrationUtils.registerTypes(env.getConfig)

      val model = new HalfPlusTwo(new Path("../models/half_plus_two"))

      val outputs = env
        .fromCollection(examples())
        .flatMap(new RichFlatMapFunction[LabeledExample, Float] {
          override def open(parameters: Configuration): Unit = model.open()
          override def close(): Unit = model.close()

          override def flatMap(value: (Example, Float), out: Collector[Float]): Unit = {
            for {
              x <- managed(Seq(value._1).toList.as[Tensor].taggedAs[ExampleTensor])
              y <- model.regress_x_to_y(x)
            } {
              // cast as a 1D tensor to use the available conversion
              val o = y.taggedAs[TypedTensor[`1D`,Float]].as[Array[Float]]
              val actual = o(0)
              checkState(actual == value._2)
              out.collect(actual)
            }
          }
        })
        .print()

      env.execute()
    }
  }
}

Source File: StreamingDemo.scala From flink-demos with Apache License 2.0

5 votes

package com.dataartisans.flink.example.eventpattern

import java.text.SimpleDateFormat
import java.util
import java.util.{Calendar, Properties, UUID}

import com.dataartisans.flink.example.eventpattern.kafka.EventDeSerializer

import org.apache.flink.api.common.functions.{RuntimeContext, RichFlatMapFunction}
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.elasticsearch.{IndexRequestBuilder, ElasticsearchSink}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08
import org.apache.flink.util.Collector

import org.elasticsearch.action.index.IndexRequest
import org.elasticsearch.client.Requests


class StateMachineMapper extends RichFlatMapFunction[Event, Alert] {
  
  private[this] var currentState: ValueState[State] = _
    
  override def open(config: Configuration): Unit = {
    currentState = getRuntimeContext.getState(
      new ValueStateDescriptor("state", classOf[State], InitialState))
  }
  
  override def flatMap(t: Event, out: Collector[Alert]): Unit = {
    val state = currentState.value()
    val nextState = state.transition(t.event)
    
    nextState match {
      case InvalidTransition =>
        out.collect(Alert(t.sourceAddress, state, t.event))
      case x if x.terminal =>
        currentState.clear()
      case x =>
        currentState.update(nextState)
    }
  }
}

Source File: TaxiRideProcessor.scala From pipelines-examples with Apache License 2.0

5 votes

package pipelines.examples
package processor

import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.functions.co._
import org.apache.flink.api.common.state.{ ValueState, ValueStateDescriptor }
import org.apache.flink.util.Collector

import pipelines.streamlets.StreamletShape
import pipelines.streamlets.avro._
import pipelines.flink.avro._
import pipelines.flink._

class TaxiRideProcessor extends FlinkStreamlet {

  // Step 1: Define inlets and outlets. Note for the outlet you need to specify
  //         the partitioner function explicitly : here we are using the
  //         rideId as the partitioner
  @transient val inTaxiRide = AvroInlet[TaxiRide]("in-taxiride")
  @transient val inTaxiFare = AvroInlet[TaxiFare]("in-taxifare")
  @transient val out = AvroOutlet[TaxiRideFare]("out", _.rideId.toString)

  // Step 2: Define the shape of the streamlet. In this example the streamlet
  //         has 2 inlets and 1 outlet
  @transient val shape = StreamletShape.withInlets(inTaxiRide, inTaxiFare).withOutlets(out)

  // Step 3: Provide custom implementation of `FlinkStreamletLogic` that defines
  //         the behavior of the streamlet
  override def createLogic() = new FlinkStreamletLogic {
    override def buildExecutionGraph = {
      val rides: DataStream[TaxiRide] =
        readStream(inTaxiRide)
          .filter { ride ⇒ ride.isStart.booleanValue }
          .keyBy("rideId")

      val fares: DataStream[TaxiFare] =
        readStream(inTaxiFare)
          .keyBy("rideId")

      val processed: DataStream[TaxiRideFare] =
        rides
          .connect(fares)
          .flatMap(new EnrichmentFunction)

      writeStream(out, processed)
    }
  }

  import org.apache.flink.configuration.Configuration
  class EnrichmentFunction extends RichCoFlatMapFunction[TaxiRide, TaxiFare, TaxiRideFare] {

    @transient var rideState: ValueState[TaxiRide] = null
    @transient var fareState: ValueState[TaxiFare] = null

    override def open(params: Configuration): Unit = {
      super.open(params)
      rideState = getRuntimeContext.getState(
        new ValueStateDescriptor[TaxiRide]("saved ride", classOf[TaxiRide]))
      fareState = getRuntimeContext.getState(
        new ValueStateDescriptor[TaxiFare]("saved fare", classOf[TaxiFare]))
    }

    override def flatMap1(ride: TaxiRide, out: Collector[TaxiRideFare]): Unit = {
      val fare = fareState.value
      if (fare != null) {
        fareState.clear()
        out.collect(new TaxiRideFare(ride.rideId, fare.totalFare))
      } else {
        rideState.update(ride)
      }
    }

    override def flatMap2(fare: TaxiFare, out: Collector[TaxiRideFare]): Unit = {
      val ride = rideState.value
      if (ride != null) {
        rideState.clear()
        out.collect(new TaxiRideFare(ride.rideId, fare.totalFare))
      } else {
        fareState.update(fare)
      }
    }
  }
}

Source File: CacheFile.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.tableJoin

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.scala._
import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.configuration.Configuration
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer

import scala.io.Source


object CacheFile {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
      env.registerCachedFile("/opt/flink1.7/data/tablejoin.txt", "tablejoin.txt")
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
      // file and register name
      env.registerCachedFile("C:\\Users\\venn\\git\\venn\\flinkDemo\\src\\main\\resources\\data\\tablejoin.txt", "tablejoin.txt")
    }
    // cache table


    val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
    val source = new FlinkKafkaConsumer[ObjectNode]("table_join", new JsonNodeDeserializationSchema, Common.getProp)


    env.addSource(source)
      .map(json => {

        val id = json.get("id").asText()
        val phone = json.get("phone").asText()

        Tuple2(id, phone)
      })
      .map(new RichMapFunction[(String, String), String] {

        var cache = Map("" -> "")

        override def open(parameters: Configuration): Unit = {

          // read cache file
          val file = getRuntimeContext.getDistributedCache.getFile("tablejoin.txt")
          if (file.canRead) {
            val context = Source.fromFile(file, "utf-8").getLines().toArray

           context.foreach(line => {
             val tmp = line.split(",")
             cache += (tmp(0) -> tmp(1))
           })
          }
        }

        override def map(value: (String, String)): String = {
          val name = cache.get(value._1)

          value._1 + "," + value._2 + "," + cache.get(value._1)
        }

      })
      .print()

    env.execute("cacheFile")

  }

}

Source File: CustomerTimerDemo.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.timer

import java.io.File
import java.sql.{Connection, DriverManager, PreparedStatement, SQLException}
import java.util
import java.util.{Timer, TimerTask}
import org.apache.flink.api.scala._
import com.venn.common.Common
import com.venn.util.TwoStringSource
import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.configuration.Configuration
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer
import org.slf4j.LoggerFactory


      def query() = {
        logger.info("query mysql")
        try {
          Class.forName(driverName)
          conn = DriverManager.getConnection(jdbcUrl, username, password)
          ps = conn.prepareStatement("select id,name from venn.timer")
          val rs = ps.executeQuery

          while (!rs.isClosed && rs.next) {
            val id = rs.getString(1)
            val name = rs.getString(2)
            map.put(id, name)
          }
          logger.info("get config from db size : {}", map.size())

        } catch {
          case e@(_: ClassNotFoundException | _: SQLException) =>
            e.printStackTrace()
        } finally {
          if (conn != null) {
            conn.close()
          }
        }
      }
    })
//              .print()


    val sink = new FlinkKafkaProducer[String]("timer_out"
      , new SimpleStringSchema()
      , Common.getProp)
    stream.addSink(sink)
    env.execute(this.getClass.getName)

  }

}

Source File: MysqlSink1.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.jdbcOutput

import java.sql.{Connection, DriverManager, PreparedStatement, SQLException}
import org.apache.flink.api.common.io.OutputFormat
import org.apache.flink.configuration.Configuration
import org.slf4j.{Logger, LoggerFactory}

class MysqlSink1 extends OutputFormat[User]{

  val logger: Logger = LoggerFactory.getLogger("MysqlSink1")
  var conn: Connection = _
  var ps: PreparedStatement = _
  val jdbcUrl = "jdbc:mysql://192.168.229.128:3306?useSSL=false&allowPublicKeyRetrieval=true"
  val username = "root"
  val password = "123456"
  val driverName = "com.mysql.jdbc.Driver"

  override def configure(parameters: Configuration): Unit = {
    // not need
  }

  override def open(taskNumber: Int, numTasks: Int): Unit = {
    Class.forName(driverName)
    try {
      Class.forName(driverName)
      conn = DriverManager.getConnection(jdbcUrl, username, password)

      // close auto commit
      conn.setAutoCommit(false)
    } catch {
      case e@(_: ClassNotFoundException | _: SQLException) =>
        logger.error("init mysql error")
        e.printStackTrace()
        System.exit(-1);
    }
  }

  override def writeRecord(user: User): Unit = {

    println("get user : " + user.toString)
    ps = conn.prepareStatement("insert into async.user(username, password, sex, phone) values(?,?,?,?)")
    ps.setString(1, user.username)
    ps.setString(2, user.password)
    ps.setInt(3, user.sex)
    ps.setString(4, user.phone)

    ps.execute()
    conn.commit()
  }

  override def close(): Unit = {

    if (conn != null){
      conn.commit()
      conn.close()
    }
  }
}

Source File: MysqlSink.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.jdbcOutput

import java.sql.{Connection, DriverManager, PreparedStatement, SQLException}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.slf4j.{Logger, LoggerFactory}

class MysqlSink extends RichSinkFunction[User] {

  val logger: Logger = LoggerFactory.getLogger("MysqlSink")
  var conn: Connection = _
  var ps: PreparedStatement = _
  val jdbcUrl = "jdbc:mysql://192.168.229.128:3306?useSSL=false&allowPublicKeyRetrieval=true"
  val username = "root"
  val password = "123456"
  val driverName = "com.mysql.jdbc.Driver"

  override def open(parameters: Configuration): Unit = {

    Class.forName(driverName)
    try {
      Class.forName(driverName)
      conn = DriverManager.getConnection(jdbcUrl, username, password)

      // close auto commit
      conn.setAutoCommit(false)
    } catch {
      case e@(_: ClassNotFoundException | _: SQLException) =>
        logger.error("init mysql error")
        e.printStackTrace()
        System.exit(-1);
    }
  }

  
  override def invoke(user: User, context: SinkFunction.Context[_]): Unit = {
    println("get user : " + user.toString)
    ps = conn.prepareStatement("insert into async.user(username, password, sex, phone) values(?,?,?,?)")
    ps.setString(1, user.username)
    ps.setString(2, user.password)
    ps.setInt(3, user.sex)
    ps.setString(4, user.phone)

    ps.execute()
    conn.commit()
  }



  override def close(): Unit = {
    if (conn != null){
      conn.commit()
      conn.close()
    }
  }
}

Source File: RangePSLogicWithClose.scala From flink-parameter-server with Apache License 2.0

5 votes

package hu.sztaki.ilab.ps.server

import hu.sztaki.ilab.ps.{ParameterServer, ParameterServerLogic}
import org.apache.flink.api.common.functions.RuntimeContext
import org.apache.flink.configuration.Configuration

class RangePSLogicWithClose[P](featureCount: Int, paramInit: => Int => P, paramUpdate: => (P, P) => P) extends ParameterServerLogic[Int, P, (Int, P)] {
  var startIndex = 0
  var params: Array[Option[P]] = _

  @transient lazy val init: (Int) => P = paramInit
  @transient lazy val update: (P, P) => P = paramUpdate

  override def onPullRecv(id: Int, workerPartitionIndex: Int, ps: ParameterServer[Int, P, (Int, P)]): Unit = {
    if (id - startIndex < 0) {
      println(id)
      println(params.mkString("[", ",", "]"))
    }
    ps.answerPull(id, params(id - startIndex) match {
      case Some(e) => e
      case None => val ini = init(id)
        params(id - startIndex) = Some(ini)
        ini
    }, workerPartitionIndex)
  }


  override def onPushRecv(id: Int, deltaUpdate: P, ps: ParameterServer[Int, P, (Int, P)]): Unit = {
    val index = id  - startIndex
    val c = params(index) match {
      case Some(q) =>
        update(q, deltaUpdate)
      case None =>
        deltaUpdate
    }
    params(index) = Some(c)
  }

  
  override def open(parameters: Configuration, runtimeContext: RuntimeContext): Unit = {
    super.open(parameters, runtimeContext)
    val div = Math.ceil(featureCount.toDouble / runtimeContext.getNumberOfParallelSubtasks).toInt
    val mod = featureCount - (runtimeContext.getNumberOfParallelSubtasks - 1) * div
    params = Array.fill[Option[P]](
      if (mod != 0 && runtimeContext.getIndexOfThisSubtask + 1 == runtimeContext.getNumberOfParallelSubtasks) {
        mod
      } else {
        div
      })(None)
    startIndex = runtimeContext.getIndexOfThisSubtask * div
  }
}

Source File: DemoStreamEnvironment.scala From flink-streaming-demo with Apache License 2.0

5 votes

package com.dataartisans.flink_demo.utils

import org.apache.flink.configuration.{ConfigConstants, Configuration}
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.streaming.api.environment.LocalStreamEnvironment

object DemoStreamEnvironment {

  def env: StreamExecutionEnvironment = {
    val config = new Configuration()
    // start the web dashboard
    config.setBoolean(ConfigConstants.LOCAL_START_WEBSERVER, true)
    // required to start the web dashboard
    config.setString(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY, "./data/dummyLogFile.txt")

    // create a local stream execution environment
    new LocalStreamEnvironment(config)
  }

}

Source File: SinkFuncs.scala From piglet with Apache License 2.0

5 votes

package dbis.piglet.backends.flink.streaming

import org.apache.flink.streaming.api.functions.sink._
import org.apache.flink.configuration.Configuration
import org.zeromq._
import org.zeromq.ZMQ._

class ZmqPublisher(addr: String) extends RichSinkFunction[String]{ 
  private var publisher: Socket = _
  private val schema = new UTF8StringSchema()

  def initializeConnection = {
    try {
      printf("Initialize Publisher at Socket %s\n", addr)
      val context = ZMQ.context(1)
      publisher = context.socket(ZMQ.PUB)
      publisher.setLinger(5000);
      publisher.setSndHWM(0);
      publisher.bind(addr)
      Thread sleep 1000
    } catch {
      case e: java.io.IOException => throw new RuntimeException(s"Cannot initialize connection to socket $addr")
      case e: Throwable => throw e
    }
  }

  override def invoke(in: String) = {
    val msg: Array[Byte] = schema.serialize(in)
    try {
      printf("Sending: %s\n", in)
      publisher.send(msg, 0)
    } catch {
      case e: java.io.IOException => throw new RuntimeException(s"Cannot send message ${in.toString} to socket $addr")
      case e: Throwable => throw e
    }
  }

  def closeConnection = {
    try {
      Thread sleep 1000
      publisher.close
    } catch {
      case e: java.io.IOException => throw new RuntimeException(s"Error while closing connection with socket $addr")
      case e: Throwable => throw e
    }
  }

  override def open(parameters: Configuration) = {
    initializeConnection
  }

  override def close = {
    closeConnection
  }
}

Source File: SourceFuncs.scala From piglet with Apache License 2.0

5 votes

package dbis.piglet.backends.flink.streaming


import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.source._
import org.apache.flink.streaming.api.functions.source.SourceFunction._
import org.zeromq._
import org.zeromq.ZMQ._

class ZmqSubscriber(addr: String) extends RichSourceFunction[String]{ 

  private var subscriber: Socket = _
  @volatile private var isRunning: Boolean = _
  private val schema = new UTF8StringSchema()

  @throws(classOf[Exception])
  override def open(parameters: Configuration) = {
    super.open(parameters)
    val context = ZMQ.context(1)
    subscriber = context.socket(ZMQ.SUB)
    subscriber.setRcvHWM(0)
    subscriber.connect(addr)
    subscriber.subscribe("".getBytes())
    isRunning = true;
  }

  @throws(classOf[Exception])
  override def run(ctx: SourceContext[String]) = {
    streamFromSocket(ctx, subscriber)
  }

  @throws(classOf[Exception])
  def streamFromSocket(ctx: SourceContext[String], socket: Socket) = {
    try {
      while (isRunning) {
        try {
          val msg: Array[Byte] = socket.recv(0)
          val element: String = msg match {
            case msg: Array[Byte] => schema.deserialize(msg)
            case _ => new String(msg)
          }
//          val element: List[String] = schema.deserialize(msg)
          ctx.collect(element)
        } 
        catch {
          case e: ZMQException => throw e
        }
        
      }
    } finally {
      socket.close
    }
  }

  override def cancel() = {
    isRunning = false
    try {
      subscriber.close
    } catch {
      case e: java.io.IOException => throw new Exception(s"Could not close open socket")
    }
  }

}

Source File: SinkFunctionExample.scala From examples-scala with Apache License 2.0

5 votes

package io.github.streamingwithflink.chapter8

import java.io.PrintStream
import java.net.{InetAddress, Socket}

import io.github.streamingwithflink.util.{SensorReading, SensorSource, SensorTimeAssigner}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.apache.flink.streaming.api.scala._

/**
  * Example program that writes sensor readings to a socket.
  *
  * NOTE: Before starting the program, you need to start a process that listens on a socket at localhost:9191.
  * On Linux, you can do that with nc (netcat) with the following command:
  *
  * nc -l localhost 9191
  */
object SinkFunctionExample {

  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    // use event time for the application
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    // configure watermark interval
    env.getConfig.setAutoWatermarkInterval(1000L)

    // ingest sensor stream
    val readings: DataStream[SensorReading] = env
      // SensorSource generates random temperature readings
      .addSource(new SensorSource)
      // assign timestamps and watermarks which are required for event time
      .assignTimestampsAndWatermarks(new SensorTimeAssigner)

    // write the sensor readings to a socket
    readings.addSink(new SimpleSocketSink("localhost", 9191))
      // set parallelism to 1 because only one thread can write to a socket
      .setParallelism(1)

    env.execute()
  }
}

/**
  * Writes a stream of [[SensorReading]] to a socket.
  */
class SimpleSocketSink(val host: String, val port: Int)
    extends RichSinkFunction[SensorReading] {

  var socket: Socket = _
  var writer: PrintStream = _

  override def open(config: Configuration): Unit = {
    // open socket and writer
    socket = new Socket(InetAddress.getByName(host), port)
    writer = new PrintStream(socket.getOutputStream)
  }

  override def invoke(
      value: SensorReading,
      ctx: SinkFunction.Context[_]): Unit = {
    // write sensor reading to socket
    writer.println(value.toString)
    writer.flush()
  }

  override def close(): Unit = {
    // close writer and socket
    writer.close()
    socket.close()
  }
}

Source File: StreamingSessionExample.scala From flink_training with Apache License 2.0

5 votes

package com.tmalaska.flinktraining.example.session

import java.util.Properties

import net.liftweb.json.DefaultFormats
import net.liftweb.json.Serialization.read
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.util.Collector
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._

object StreamingSessionExample {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)
    val sessionTimeOut = args(4).toInt

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    //val socketStream = env.socketTextStream("localhost",9999, '\n')

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val messageStream:DataStream[String] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))

    val heartBeatStream = messageStream
      .map(str => {
        implicit val formats = DefaultFormats
        println("str:" + str)
        val hb = read[HeartBeat](str)
        (hb.entityId, hb.eventTime)
      }).keyBy(0).process(new MyProcessFunction(sessionTimeOut))

    heartBeatStream.map(session => {
      println("session:" + session)
      session
    })

    heartBeatStream.print()

    env.execute()
  }
}

class MyProcessFunction(sessionTimeOut:Int) extends ProcessFunction[(String,Long), SessionObj] {


  private var state:ValueState[SessionObj] = null


  override def open(parameters: Configuration): Unit = {
    state = getRuntimeContext.getState(new ValueStateDescriptor[SessionObj]("myState", classOf[SessionObj]))
  }

  override def processElement(value: (String, Long),
                              ctx: ProcessFunction[(String, Long), SessionObj]#Context,
                              out: Collector[SessionObj]): Unit = {
    val currentSession = state.value()
    var outBoundSessionRecord:SessionObj = null
    if (currentSession == null) {
      outBoundSessionRecord = SessionObj(value._2, value._2, 1)
    } else {
      outBoundSessionRecord = SessionObj(currentSession.startTime, value._2, currentSession.heartbeatCount + 1)

    }
    state.update(outBoundSessionRecord)
    out.collect(outBoundSessionRecord)
    ctx.timerService.registerEventTimeTimer(System.currentTimeMillis() + sessionTimeOut)
  }

  override def onTimer(timestamp: Long,
                       ctx: ProcessFunction[(String, Long), SessionObj]#OnTimerContext,
                       out: Collector[SessionObj]): Unit = {
    val result = state.value
    if (result != null && result.latestEndTime + sessionTimeOut < System.currentTimeMillis()) { // emit the state on timeout
      state.clear()
    }
  }
}

case class SessionObj(startTime:Long, latestEndTime:Long, heartbeatCount:Int)

Source File: WholeFileInputFormat.scala From flink-tensorflow with Apache License 2.0

5 votes

package org.apache.flink.contrib.tensorflow.io

import java.io.{EOFException, IOException, InputStream}

import org.apache.flink.api.common.io.FileInputFormat
import org.apache.flink.configuration.Configuration
import org.apache.flink.core.fs._
import org.apache.flink.util.Preconditions.checkState


  @throws[IOException]
  def readRecord(reuse: T, filePath: Path, fileStream: FSDataInputStream, fileLength: Long): T

  // --------------------------------------------------------------------------------------------
  //  Lifecycle
  // --------------------------------------------------------------------------------------------

  override def nextRecord(reuse: T): T = {
    checkState(!reachedEnd())
    checkState(currentSplit != null && currentSplit.getStart == 0)
    checkState(stream != null)
    readRecord(reuse, currentSplit.getPath, stream, currentSplit.getLength)
  }

  override def reachedEnd(): Boolean = {
    stream.getPos != 0
  }
}

@SerialVersionUID(1L)
object WholeFileInputFormat {

  @throws[IOException]
  def readFully(fileStream: FSDataInputStream, fileLength: Long): Array[Byte] = {
    if(fileLength > Int.MaxValue) {
      throw new IllegalArgumentException("the file is too large to be fully read")
    }
    val buf = new Array[Byte](fileLength.toInt)
    readFully(fileStream, buf, 0, fileLength.toInt)
    buf
  }

  @throws[IOException]
  def readFully(inputStream: InputStream, buf: Array[Byte], off: Int, len: Int): Array[Byte] = {
    var bytesRead = 0
    while (bytesRead < len) {
      val read = inputStream.read(buf, off + bytesRead, len - bytesRead)
      if (read < 0) throw new EOFException("Premature end of stream")
      bytesRead += read
    }
    buf
  }
}

Source File: ImageInputFormat.scala From flink-tensorflow with Apache License 2.0

5 votes

package org.apache.flink.contrib.tensorflow.examples.inception

import java.io.IOException
import java.util.Collections

import com.twitter.bijection.Conversion._
import org.apache.flink.api.common.io.GlobFilePathFilter
import org.apache.flink.configuration.Configuration
import org.apache.flink.contrib.tensorflow._
import org.apache.flink.contrib.tensorflow.common.functions.util.ModelUtils
import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat
import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat._
import org.apache.flink.core.fs.{FSDataInputStream, Path}
import org.slf4j.{Logger, LoggerFactory}
import org.tensorflow.contrib.scala.ByteStrings._
import resource._

import scala.collection.JavaConverters._


  override def readRecord(
       reuse: (String,ImageTensorValue),
       filePath: Path, fileStream: FSDataInputStream,
       fileLength: Long): (String,ImageTensorValue) = {

    if(fileLength > Int.MaxValue) {
      throw new IllegalArgumentException("the file is too large to be fully read")
    }
    val imageData =
      readFully(fileStream, new Array[Byte](fileLength.toInt), 0, fileLength.toInt).asByteString[ImageFile]

    val imageTensor: ImageTensorValue =
      managed(imageData.as[ImageFileTensor])
      .flatMap(x => model.normalize(x))
      .acquireAndGet(_.toValue)

    (filePath.getName, imageTensor)
  }
}

object ImageInputFormat {
  def apply(): ImageInputFormat = new ImageInputFormat
}

Source File: LeftJoinKeyedCoProcessFunction.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.flink.runtime

import com.amazon.milan.compiler.flink.internal.JoinLineageRecordFactory
import com.amazon.milan.compiler.flink.metrics.MetricFactory
import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation}
import com.amazon.milan.types.LineageRecord
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.java.typeutils.ResultTypeQueryable
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction
import org.apache.flink.util.{Collector, OutputTag}


object LeftJoinCoProcessFunction {
  val LeftInputRecordsCounterMetricName = "left_input_record_count"
  val RightInputRecordsCounterMetricName = "right_input_record_count"
  val OutputRecordsCounterMetricName = "output_record_count"
}

import com.amazon.milan.compiler.flink.runtime.LeftJoinCoProcessFunction._


abstract class LeftJoinKeyedCoProcessFunction[TLeft >: Null, TRight >: Null, TKey >: Null <: Product, TOut >: Null](rightTypeInformation: TypeInformation[TRight],
                                                                                                                    keyTypeInformation: TypeInformation[TKey],
                                                                                                                    outputTypeInformation: TypeInformation[TOut],
                                                                                                                    leftRecordIdExtractor: RecordIdExtractor[TLeft],
                                                                                                                    rightRecordIdExtractor: RecordIdExtractor[TRight],
                                                                                                                    outputRecordIdExtractor: RecordIdExtractor[TOut],
                                                                                                                    lineageRecordFactory: JoinLineageRecordFactory,
                                                                                                                    lineageOutputTag: OutputTag[LineageRecord],
                                                                                                                    metricFactory: MetricFactory)
  extends KeyedCoProcessFunction[TKey, RecordWrapper[TLeft, TKey], RecordWrapper[TRight, TKey], RecordWrapper[TOut, TKey]]
    with ResultTypeQueryable[RecordWrapper[TOut, TKey]] {

  @transient private lazy val canProduceLineage = leftRecordIdExtractor.canExtractRecordId && rightRecordIdExtractor.canExtractRecordId && outputRecordIdExtractor.canExtractRecordId
  @transient private lazy val leftInputRecordsCounter = this.metricFactory.createCounter(this.getRuntimeContext, LeftInputRecordsCounterMetricName)
  @transient private lazy val rightInputRecordsCounter = this.metricFactory.createCounter(this.getRuntimeContext, RightInputRecordsCounterMetricName)
  @transient private lazy val outputRecordsCounter = this.metricFactory.createCounter(this.getRuntimeContext, OutputRecordsCounterMetricName)

  @transient private var lastRightValue: ValueState[TRight] = _

  protected def map(left: TLeft, right: TRight): TOut

  protected def postCondition(left: TLeft, right: TRight): Boolean

  override def processElement1(leftRecord: RecordWrapper[TLeft, TKey],
                               context: KeyedCoProcessFunction[TKey, RecordWrapper[TLeft, TKey], RecordWrapper[TRight, TKey], RecordWrapper[TOut, TKey]]#Context,
                               collector: Collector[RecordWrapper[TOut, TKey]]): Unit = {
    this.leftInputRecordsCounter.increment()

    val leftValue = leftRecord.value
    val rightValue = this.lastRightValue.value()

    if (this.postCondition(leftValue, rightValue)) {
      val output = this.map(leftValue, rightValue)

      if (output != null) {
        if (this.canProduceLineage) {
          val lineageRecord = this.createLineageRecord(this.outputRecordIdExtractor(output), leftValue, rightValue)
          context.output(this.lineageOutputTag, lineageRecord)
        }

        collector.collect(RecordWrapper.wrap[TOut, TKey](output, leftRecord.key, 0))
        this.outputRecordsCounter.increment()
      }
    }
  }

  override def processElement2(rightRecord: RecordWrapper[TRight, TKey],
                               context: KeyedCoProcessFunction[TKey, RecordWrapper[TLeft, TKey], RecordWrapper[TRight, TKey], RecordWrapper[TOut, TKey]]#Context,
                               collector: Collector[RecordWrapper[TOut, TKey]]): Unit = {
    this.rightInputRecordsCounter.increment()
    this.lastRightValue.update(rightRecord.value)
  }

  override def open(parameters: Configuration): Unit = {
    val rightValueDescriptor = new ValueStateDescriptor[TRight]("lastRightValue", this.rightTypeInformation)
    this.lastRightValue = this.getRuntimeContext.getState(rightValueDescriptor)
  }

  override def getProducedType: TypeInformation[RecordWrapper[TOut, TKey]] =
    RecordWrapperTypeInformation.wrap(this.outputTypeInformation, this.keyTypeInformation)

  private def createLineageRecord(outputRecordId: String, leftRecord: TLeft, rightRecord: TRight): LineageRecord = {
    val sourceRecords =
      Option(leftRecord).toSeq.map(r => this.lineageRecordFactory.createLeftRecordPointer(this.leftRecordIdExtractor(r))) ++
        Option(rightRecord).toSeq.map(r => this.lineageRecordFactory.createRightRecordPointer(this.rightRecordIdExtractor(r)))

    this.lineageRecordFactory.createLineageRecord(outputRecordId, sourceRecords)
  }
}

Source File: TimeWindowFlatMapProcessWindowFunction.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.flink.runtime

import java.lang
import java.time.Instant

import com.amazon.milan.compiler.flink.TypeUtil
import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation}
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.java.typeutils.ResultTypeQueryable
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector


abstract class TimeWindowFlatMapProcessWindowFunction[T >: Null, TInKey >: Null <: Product, TOutKey >: Null <: Product](recordTypeInfo: TypeInformation[T],
                                                                                                                        outKeyTypeInfo: TypeInformation[TOutKey])
  extends ProcessWindowFunction[RecordWrapper[Option[T], TInKey], RecordWrapper[Option[T], TOutKey], TInKey, TimeWindow]
    with ResultTypeQueryable[RecordWrapper[Option[T], TOutKey]] {

  @transient private var sequenceNumberHelper: SequenceNumberHelper = _

  protected def addWindowStartTimeToKey(key: TInKey, windowStart: Instant): TOutKey

  override def getProducedType: TypeInformation[RecordWrapper[Option[T], TOutKey]] =
    RecordWrapperTypeInformation.wrap(TypeUtil.createOptionTypeInfo(this.recordTypeInfo), this.outKeyTypeInfo)

  override def process(key: TInKey,
                       context: ProcessWindowFunction[RecordWrapper[Option[T], TInKey], RecordWrapper[Option[T], TOutKey], TInKey, TimeWindow]#Context,
                       items: lang.Iterable[RecordWrapper[Option[T], TInKey]],
                       collector: Collector[RecordWrapper[Option[T], TOutKey]]): Unit = {
    val windowStartTime = Instant.ofEpochMilli(context.window().getStart)

    val record = items.iterator().next()
    val outKey = this.addWindowStartTimeToKey(record.key, windowStartTime)
    val outRecord = RecordWrapper.wrap(record.value, outKey, sequenceNumberHelper.increment())
    collector.collect(outRecord)
  }

  override def open(parameters: Configuration): Unit = {
    this.sequenceNumberHelper = new SequenceNumberHelper(this.getRuntimeContext)
  }
}

Source File: AssignSequenceNumberProcessWindowFunctions.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.flink.runtime

import java.lang

import com.amazon.milan.compiler.flink.types.RecordWrapper
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction
import org.apache.flink.streaming.api.windowing.windows.Window
import org.apache.flink.util.Collector



class AssignSequenceNumberProcessAllWindowFunction[T >: Null, TKey >: Null <: Product, TWindow <: Window]
  extends ProcessAllWindowFunction[RecordWrapper[T, TKey], RecordWrapper[T, TKey], TWindow] {

  @transient private var sequenceNumberHelper: SequenceNumberHelper = _

  override def process(context: ProcessAllWindowFunction[RecordWrapper[T, TKey], RecordWrapper[T, TKey], TWindow]#Context,
                       items: lang.Iterable[RecordWrapper[T, TKey]],
                       collector: Collector[RecordWrapper[T, TKey]]): Unit = {
    val item = items.iterator().next()
    val outputRecord = item.withSequenceNumber(this.sequenceNumberHelper.increment())
    collector.collect(outputRecord)
  }

  override def open(parameters: Configuration): Unit = {
    this.sequenceNumberHelper = new SequenceNumberHelper(this.getRuntimeContext)
  }
}

org.apache.flink.configuration.Configuration Scala Examples