org.apache.flink.streaming.api.scala.StreamExecutionEnvironment Scala Example

Source File: ConsoleReporterTestJob.scala From flink-stuff with Apache License 2.0

6 votes

package com.jgrier.flinkstuff.jobs

import com.jgrier.flinkstuff.sources.IntegerSource
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.environment.LocalStreamEnvironment
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.api.scala._

object ConsoleReporterTestJob {
  def main(args: Array[String]) {
    val config = new Configuration()
    config.setString("metrics.reporters", "consoleReporter")
    config.setString("metrics.reporter.consoleReporter.class", "com.jgrier.flinkstuff.metrics.ConsoleReporter")
    config.setString("metrics.reporter.consoleReporter.interval", "10 SECONDS")

    val env = new StreamExecutionEnvironment(new LocalStreamEnvironment(config))
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    val stream = env.addSource(new IntegerSource(100))

    stream
      .timeWindowAll(Time.seconds(1))
      .sum(0)
      .print

    env.execute("ConsoleReporterTestJob")
  }
}

Source File: RegressITCase.scala From flink-tensorflow with Apache License 2.0

6 votes

package org.apache.flink.contrib.tensorflow.ml

import com.twitter.bijection.Conversion._
import org.apache.flink.api.common.functions.RichFlatMapFunction
import org.apache.flink.api.scala._
import org.apache.flink.configuration.Configuration
import org.apache.flink.contrib.tensorflow.ml.signatures.RegressionMethod._
import org.apache.flink.contrib.tensorflow.types.TensorInjections.{message2Tensor, messages2Tensor}
import org.apache.flink.contrib.tensorflow.util.TestData._
import org.apache.flink.contrib.tensorflow.util.{FlinkTestBase, RegistrationUtils}
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.util.Collector
import org.apache.flink.util.Preconditions.checkState
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Matchers, WordSpecLike}
import org.tensorflow.Tensor
import org.tensorflow.contrib.scala.Arrays._
import org.tensorflow.contrib.scala.Rank._
import org.tensorflow.contrib.scala._
import org.tensorflow.example.Example
import resource._

@RunWith(classOf[JUnitRunner])
class RegressITCase extends WordSpecLike
  with Matchers
  with FlinkTestBase {

  override val parallelism = 1

  type LabeledExample = (Example, Float)

  def examples(): Seq[LabeledExample] = {
    for (v <- Seq(0.0f -> 2.0f, 1.0f -> 2.5f, 2.0f -> 3.0f, 3.0f -> 3.5f))
      yield (example("x" -> feature(v._1)), v._2)
  }

  "A RegressFunction" should {
    "process elements" in {
      val env = StreamExecutionEnvironment.getExecutionEnvironment
      RegistrationUtils.registerTypes(env.getConfig)

      val model = new HalfPlusTwo(new Path("../models/half_plus_two"))

      val outputs = env
        .fromCollection(examples())
        .flatMap(new RichFlatMapFunction[LabeledExample, Float] {
          override def open(parameters: Configuration): Unit = model.open()
          override def close(): Unit = model.close()

          override def flatMap(value: (Example, Float), out: Collector[Float]): Unit = {
            for {
              x <- managed(Seq(value._1).toList.as[Tensor].taggedAs[ExampleTensor])
              y <- model.regress_x_to_y(x)
            } {
              // cast as a 1D tensor to use the available conversion
              val o = y.taggedAs[TypedTensor[`1D`,Float]].as[Array[Float]]
              val actual = o(0)
              checkState(actual == value._2)
              out.collect(actual)
            }
          }
        })
        .print()

      env.execute()
    }
  }
}

Source File: ContinueRising.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.cep

import java.util

import org.apache.flink.api.scala._
import org.apache.flink.cep.functions.PatternProcessFunction
import org.apache.flink.cep.pattern.conditions.IterativeCondition
import org.apache.flink.cep.scala.CEP
import org.apache.flink.cep.scala.pattern.Pattern
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.util.Collector
import org.slf4j.LoggerFactory


    val pattern = Pattern.begin[CepDemoEvent]("first")
      .next("second").where(new IterativeCondition[CepDemoEvent] {
      override def filter(currentEvent: CepDemoEvent, context: IterativeCondition.Context[CepDemoEvent]): Boolean = {
        // get last event
        val firstList = context.getEventsForPattern("first").iterator()
        var lastStart: CepDemoEvent = null
        // get last from firstList, and get the last one
        while (firstList.hasNext) {
          lastStart = firstList.next()
        }
        if (currentEvent.volume > lastStart.volume) {
          true
        } else {
          false
        }
      }
    })
      // always remember add within, it will reduce the state usage
      .within(Time.minutes(5 * 60 * 1000))

    val patternStream = CEP.pattern(input, pattern)

    val result: DataStream[String] = patternStream.process(
      new PatternProcessFunction[CepDemoEvent, String]() {
        override def processMatch(
                                   events: util.Map[String, util.List[CepDemoEvent]],
                                   ctx: PatternProcessFunction.Context,
                                   out: Collector[String]): Unit = {
          // get the change
          val first = events.get("first").get(0)
          val second = events.get("second").get(0)
          val change = second.volume - first.volume
          out.collect("from : " + first.id + ", to " + second.id + ", change : " + change)
        }

      })

    // for convenient, just print
    result.print()
    env.execute(this.getClass.getName)
  }


}

Source File: SourceFunctionExample.scala From examples-scala with Apache License 2.0

5 votes

package io.github.streamingwithflink.chapter8

import org.apache.flink.api.common.state.{ListState, ListStateDescriptor}
import org.apache.flink.api.scala._
import org.apache.flink.runtime.state.{FunctionInitializationContext, FunctionSnapshotContext}
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}

object SourceFunctionExample {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    val numbers: DataStream[Long] = env.addSource(new CountSource)
    numbers.print()

    env.execute()
  }

}

class CountSource extends SourceFunction[Long] {

  var isRunning: Boolean = true

  override def run(ctx: SourceFunction.SourceContext[Long]): Unit = {

    var cnt: Long = -1
    while (isRunning && cnt < Long.MaxValue) {
      // increment cnt
      cnt += 1
      ctx.collect(cnt)
    }

  }

  override def cancel(): Unit = isRunning = false
}

class ReplayableCountSource extends SourceFunction[Long] with CheckpointedFunction {

  var isRunning: Boolean = true
  var cnt: Long = _
  var offsetState: ListState[Long] = _

  override def run(ctx: SourceFunction.SourceContext[Long]): Unit = {

    while (isRunning && cnt < Long.MaxValue) {
      ctx.getCheckpointLock.synchronized {
        // increment cnt
        cnt += 1
        ctx.collect(cnt)
      }
    }
  }

  override def cancel(): Unit = isRunning = false

  override def snapshotState(snapshotCtx: FunctionSnapshotContext): Unit = {
    // remove previous cnt
    offsetState.clear()
    // add current cnt
    offsetState.add(cnt)
  }

  override def initializeState(initCtx: FunctionInitializationContext): Unit = {
    // obtain operator list state to store the current cnt
    val desc = new ListStateDescriptor[Long]("offset", classOf[Long])
    offsetState = initCtx.getOperatorStateStore.getListState(desc)

    // initialize cnt variable from the checkpoint
    val it = offsetState.get()
    cnt = if (null == it || !it.iterator().hasNext) {
      -1L
    } else {
      it.iterator().next()
    }
  }
}

Source File: AfterMatchStrategyDemo.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.cep

import java.util

import com.venn.common.Common
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.cep.functions.PatternProcessFunction
import org.apache.flink.cep.nfa.aftermatch.AfterMatchSkipStrategy
import org.apache.flink.cep.pattern.conditions.IterativeCondition
import org.apache.flink.cep.scala.CEP
import org.apache.flink.cep.scala.pattern.Pattern
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector
import org.slf4j.LoggerFactory


    val noSkit = AfterMatchSkipStrategy.noSkip()
    val pattern = Pattern.begin[CepDemoEvent]("first").where(event => {
      event.name.equals("a")
    })
      //      .timesOrMore(1)
      .next("second").where(event => {
      event.name.equals("a")
    })
      .next("third").where(event => {
      event.name.equals("b")
    })
//      .notNext()

    // always remember add within, it will reduce the state usage
    //      .within(Time.minutes(5 * 60 * 1000))

    val patternStream = CEP.pattern(input, pattern)

    val result: DataStream[String] = patternStream.process(
      new PatternProcessFunction[CepDemoEvent, String]() {
        override def processMatch(
                                   events: util.Map[String, util.List[CepDemoEvent]],
                                   ctx: PatternProcessFunction.Context,
                                   out: Collector[String]): Unit = {
          // get the change
          val first = events.get("first").get(0)
          val second = events.get("second").get(0)
          val third = events.get("third").get(0)
          out.collect("first : " + first + ", first " + second + ", third : " + third)
        }

      })

    // for convenient, just print
    result.print()
    env.execute(this.getClass.getName)
  }


}

Source File: CacheFile.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.tableJoin

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.scala._
import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.configuration.Configuration
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer

import scala.io.Source


object CacheFile {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
      env.registerCachedFile("/opt/flink1.7/data/tablejoin.txt", "tablejoin.txt")
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
      // file and register name
      env.registerCachedFile("C:\\Users\\venn\\git\\venn\\flinkDemo\\src\\main\\resources\\data\\tablejoin.txt", "tablejoin.txt")
    }
    // cache table


    val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
    val source = new FlinkKafkaConsumer[ObjectNode]("table_join", new JsonNodeDeserializationSchema, Common.getProp)


    env.addSource(source)
      .map(json => {

        val id = json.get("id").asText()
        val phone = json.get("phone").asText()

        Tuple2(id, phone)
      })
      .map(new RichMapFunction[(String, String), String] {

        var cache = Map("" -> "")

        override def open(parameters: Configuration): Unit = {

          // read cache file
          val file = getRuntimeContext.getDistributedCache.getFile("tablejoin.txt")
          if (file.canRead) {
            val context = Source.fromFile(file, "utf-8").getLines().toArray

           context.foreach(line => {
             val tmp = line.split(",")
             cache += (tmp(0) -> tmp(1))
           })
          }
        }

        override def map(value: (String, String)): String = {
          val name = cache.get(value._1)

          value._1 + "," + value._2 + "," + cache.get(value._1)
        }

      })
      .print()

    env.execute("cacheFile")

  }

}

Source File: CustomerTimerDemo.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.timer

import java.io.File
import java.sql.{Connection, DriverManager, PreparedStatement, SQLException}
import java.util
import java.util.{Timer, TimerTask}
import org.apache.flink.api.scala._
import com.venn.common.Common
import com.venn.util.TwoStringSource
import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.configuration.Configuration
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer
import org.slf4j.LoggerFactory


      def query() = {
        logger.info("query mysql")
        try {
          Class.forName(driverName)
          conn = DriverManager.getConnection(jdbcUrl, username, password)
          ps = conn.prepareStatement("select id,name from venn.timer")
          val rs = ps.executeQuery

          while (!rs.isClosed && rs.next) {
            val id = rs.getString(1)
            val name = rs.getString(2)
            map.put(id, name)
          }
          logger.info("get config from db size : {}", map.size())

        } catch {
          case e@(_: ClassNotFoundException | _: SQLException) =>
            e.printStackTrace()
        } finally {
          if (conn != null) {
            conn.close()
          }
        }
      }
    })
//              .print()


    val sink = new FlinkKafkaProducer[String]("timer_out"
      , new SimpleStringSchema()
      , Common.getProp)
    stream.addSink(sink)
    env.execute(this.getClass.getName)

  }

}

Source File: StreamingFileSinkDemo.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.filesink

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.common.serialization.{BulkWriter, SimpleStringEncoder}
import org.apache.flink.api.scala._
import org.apache.flink.core.fs.Path
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer

object StreamingFileSinkDemo {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
    }

    val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
    val source = new FlinkKafkaConsumer[ObjectNode]("roll_file_sink", new JsonNodeDeserializationSchema, Common.getProp)
    // row format
    val sinkRow = StreamingFileSink
      .forRowFormat(new Path("D:\\idea_out\\rollfilesink"), new SimpleStringEncoder[ObjectNode]("UTF-8"))
      .withBucketAssigner(new DayBucketAssigner)
      .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour
      .build()

    // use define BulkWriterFactory and DayBucketAssinger
    val sinkBuck = StreamingFileSink
      .forBulkFormat(new Path("D:\\idea_out\\rollfilesink"), new DayBulkWriterFactory)
      .withBucketAssigner(new DayBucketAssigner())
      .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour
      .build()


    env.addSource(source)
      .assignAscendingTimestamps(json => {
        sdf.parse(json.get("date").asText()).getTime
      })
      .map(json => {
//        json.get("date") + "-" + json.toString
        json
      })
      .addSink(sinkBuck)

    env.execute("StreamingFileSink")
  }

}

Source File: RollingFileSinkDemo.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.filesink

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.fs.StringWriter
import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.api.scala._


    val sink = new BucketingSink[String]("D:\\idea_out\\rollfilesink")
    sink.setBucketer(new DayBasePathBucketer)
    sink.setWriter(new StringWriter[String])
    sink.setBatchSize(1024 * 1024 * 400) // this is 400 MB,
    //    sink.setBatchRolloverInterval(24 * 60 * 60 * 1000) // this is 24 hour
//    sink.setInProgressPrefix("inProcessPre")
//    sink.setPendingPrefix("pendingpre")
//    sink.setPartPrefix("partPre")

    env.addSource(source)
      .assignAscendingTimestamps(json => {
        sdf.parse(json.get("date").asText()).getTime
      })
      .map(json => {
        json.get("date") + "-" + json.toString
      })
      .addSink(sink)

    env.execute("rollingFileSink")
  }

}

Source File: ProcessWindowForTrigger.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.trigger

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.contrib.streaming.state.RocksDBStateBackend
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector
import org.slf4j.LoggerFactory


object ProcessWindowDemoForTrigger {
  val logger = LoggerFactory.getLogger(this.getClass)

  def main(args: Array[String]): Unit = {
    // environment
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    if ("\\".equals(File.pathSeparator)) {
      val rock = new RocksDBStateBackend(Common.CHECK_POINT_DATA_DIR)
      env.setStateBackend(rock)
      // checkpoint interval
      env.enableCheckpointing(10000)
    }

    val topic = "current_day"
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

    val kafkaSource = new FlinkKafkaConsumer[String](topic, new SimpleStringSchema(), Common.getProp)
    val stream = env.addSource(kafkaSource)
      .map(s => {
        s
      })
      .windowAll(TumblingProcessingTimeWindows.of(Time.seconds(60)))
      .trigger(CountAndTimeTrigger.of(10, Time.seconds(10)))
      .process(new ProcessAllWindowFunction[String, String, TimeWindow] {
        override def process(context: Context, elements: Iterable[String], out: Collector[String]): Unit = {

          var count = 0

          elements.iterator.foreach(s => {
            count += 1
          })
          logger.info("this trigger have : {} item", count)
        }
      })

    // execute job
    env.execute(this.getClass.getName)
  }

}

Source File: MysqlOutputDemo.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.jdbcOutput

import java.io.File

import com.venn.common.Common
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala.{OutputTag, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector


object MysqlOutputDemo {

  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
    }

    val source = new FlinkKafkaConsumer[String]("mysql_output", new SimpleStringSchema, Common.getProp)
    source.setStartFromLatest()
    env.addSource(source)
        .map(li => {
          val tmp = li.split(",")
          new User(tmp(0), tmp(1), tmp(2)toInt, tmp(3))
        })
//        .addSink(new MysqlSink1)
      .writeUsingOutputFormat(new MysqlSink1)

    env.execute("msqlOutput")
  }

}

Source File: CurrentDayPvCountWaterMark.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.dayWindow

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import com.venn.source.TumblingEventTimeWindows
import org.apache.flink.api.common.functions.ReduceFunction
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.contrib.streaming.state.RocksDBStateBackend
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.triggers.{ContinuousEventTimeTrigger, ContinuousProcessingTimeTrigger, CountTrigger}
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, FlinkKafkaProducer}


      .assignAscendingTimestamps(event => sdf.parse(event.createTime).getTime)
      .windowAll(TumblingEventTimeWindows.of(Time.days(1), Time.hours(-8)))
      .reduce(new ReduceFunction[Eventx] {
        override def reduce(event1: Eventx, event2: Eventx): Eventx = {

//          println("reduce event : " +  event2.toString)
          //            val minId:String = if (event1.id.compareTo(event2.id) >= 0 ) event2.id else event1.id
          //            val maxId = if (event1.id.compareTo(event2.id) < 0 ) event1.id else event2.id
          //            val minCreateTime = if ( event1.createTime.compareTo(event2.createTime) >= 0 ) event2.createTime else event1.createTime
          //            val maxCreateTime = if ( event1.createTime.compareTo(event2.createTime) < 0 ) event1.createTime else event2.createTime
          //            val count = event1.count + event2.count
          //            new EventResult(minId, maxId, minCreateTime, maxCreateTime, count)
          new Eventx(event1.id , event2.id , event1.amt + event2.amt)
        }
      })
      // format output even, connect min max id, add current timestamp
//      .map(event => Event(event.id + "-" + event.createTime, sdf.format(System.currentTimeMillis()), event.count))
    stream.print("result : ")
    // execute job
    env.execute("CurrentDayCount")
  }

}

Source File: BroadCastDemo.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.broadcast

import java.io.File

import com.venn.common.Common
import com.venn.util.StringUtil
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.common.state.MapStateDescriptor
import org.apache.flink.api.common.typeinfo.BasicTypeInfo
import org.apache.flink.api.scala._
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector


object BroadCastDemo {

  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
    }
    // 配置更新流
    val configSource = new FlinkKafkaConsumer[String]("broad_cast_demo", new SimpleStringSchema, Common.getProp)
    // 配置流的初始化，可以通过读取配置文件实现
    var initFilePath = ""
    if ("/".equals(File.separator)){
      initFilePath = "hdfs:///venn/init_file.txt"
    }else{
      initFilePath = "D:\\idea_out\\broad_cast.txt"
    }
    val init = env.readTextFile(initFilePath)
    val descriptor = new MapStateDescriptor[String,  String]("dynamicConfig", BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO)
    val configStream = env.addSource(configSource).union(init).broadcast(descriptor)


    val input = env.addSource(new RadomFunction)
      .connect(configStream)
      .process(new BroadcastProcessFunction[String, String, String] {
        override def processBroadcastElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#Context, out: Collector[String]): Unit = {

          println("new config : " + value)
          val configMap = ctx.getBroadcastState(descriptor)
          // process update configMap，读取配置数据，写入广播状态中
          val line = value.split(",")
          configMap.put(line(0), line(1))
        }
        override def processElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#ReadOnlyContext, out: Collector[String]): Unit = {
          // use give key, return value
          val configMap = ctx.getBroadcastState(descriptor)
          // 解析三位城市编码，根据广播状态对应的map，转码为城市对应中文
//          println(value)
          val line = value.split(",")
          val code = line(0)
          var va = configMap.get(code)
          // 不能转码的数据默认输出 中国(code=xxx)
          if ( va == null){
            va = "中国(code="+code+")";
          }else{
            va = va + "(code="+code+")"
          }
          out.collect(va + "," + line(1))
        }
      })
    input.print()

    env.execute("BroadCastDemo")
  }
}

class RadomFunction extends SourceFunction[String]{
  var flag = true
  override def cancel(): Unit = {
    flag = false
  }

  override def run(ctx: SourceFunction.SourceContext[String]): Unit = {
    while (flag){
      for (i <- 0 to 300) {
        var nu = i.toString
        while (nu.length < 3) {
          nu = "0" + nu
        }
        ctx.collect(nu + "," + StringUtil.getRandomString(5))
        Thread.sleep(2000)
      }
    }
  }
}

Source File: FlinkTestUtils.scala From flink-parameter-server with Apache License 2.0

5 votes

package hu.sztaki.ilab.ps.test.utils

import org.apache.flink.runtime.client.JobExecutionException
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment

object FlinkTestUtils {

  case class SuccessException[T](content: T) extends Exception {
    override def toString: String = s"SuccessException($content)"
  }

  case class NoSuccessExceptionReceived() extends Exception

  def executeWithSuccessCheck[T](env: StreamExecutionEnvironment)(checker: T => Unit): Unit = {
    try {
      env.execute()
      throw NoSuccessExceptionReceived()
    } catch {
      case e: JobExecutionException =>
        val rootCause = Stream.iterate[Throwable](e)(_.getCause()).takeWhile(_ != null).last
        rootCause match {
          case successException: SuccessException[T] =>
            checker(successException.content)
          case otherCause =>
            throw e
        }
      case e: Throwable => throw e
    }
  }
}

Source File: FlinkStreamingCEPTest.scala From piglet with Apache License 2.0

5 votes

package dbis.cep.test.flink

import java.io.File

import dbis.piglet.backends.{ Record, SchemaClass }
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.scalatest._
import org.apache.commons.io.FileUtils
import org.apache.flink.api.scala._
import dbis.piglet.cep.nfa._
import dbis.piglet.cep.ops.SelectionStrategy._
import dbis.piglet.cep.ops.OutputStrategy._
import dbis.piglet.cep.flink.CustomDataStreamMatcher._
import scala.collection.mutable.ArrayBuffer
import org.apache.flink.streaming.api.windowing.windows.GlobalWindow
import org.apache.flink.streaming.api.windowing.assigners.GlobalWindows

case class StreamingDoubleRecord(col1: Int, col2: Int) extends java.io.Serializable with SchemaClass {
  override def mkString(delim: String) = s"$col1$delim$col2"
}

object OurStreamingNFA {
    def filter1(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 1
    def filter2(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 2
    def filter3(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 3
    def createNFA = {
      val testNFA: NFAController[StreamingDoubleRecord] = new NFAController()
      val firstState = testNFA.createAndGetStartState("First")
      val secondState = testNFA.createAndGetNormalState("Second")
      val thirdState = testNFA.createAndGetNormalState("Third")
      val finalState = testNFA.createAndGetFinalState("Final")

      val firstEdge = testNFA.createAndGetForwardEdge(filter1)
      val secondEdge = testNFA.createAndGetForwardEdge(filter2)
      val thirdEdge = testNFA.createAndGetForwardEdge(filter3)

      testNFA.createForwardTransition(firstState, firstEdge, secondState)
      testNFA.createForwardTransition(secondState, secondEdge, thirdState)
      testNFA.createForwardTransition(thirdState, thirdEdge, finalState)
      testNFA
    }
  }

class FlinkStreamingCEPTest extends FlatSpec with Matchers with BeforeAndAfterEach {
  var resultArray = new ArrayBuffer[StreamingDoubleRecord]
  override def beforeEach() {
     resultArray.clear()
  }

  val sample = Seq(
      StreamingDoubleRecord(1,1), 
      StreamingDoubleRecord(2,2), 
      StreamingDoubleRecord(1,3), 
      StreamingDoubleRecord(2,4), 
      StreamingDoubleRecord(3,5), 
      StreamingDoubleRecord(1,6),
      StreamingDoubleRecord(2,7),
      StreamingDoubleRecord(3,8))
      
  "Flink Streaming CEP" should "detect the pattern SEQ(A, B, C) with first match" in {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.getConfig.disableSysoutLogging()
    val data = env.fromCollection(sample)
    val res = data.matchNFA(OurStreamingNFA.createNFA, env, FirstMatch)
  }

  it should "detect the pattern SEQ(A, B, C) with any match" in {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.getConfig.disableSysoutLogging()
    val data = env.fromCollection(sample)
    val res = data.matchNFA(OurStreamingNFA.createNFA, env, AllMatches)
  }

  it should "detect the pattern SEQ(A, B, C) with next match" in {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.getConfig.disableSysoutLogging()
    val data = env.fromCollection(sample)
    val res = data.matchNFA(OurStreamingNFA.createNFA, env, NextMatches)
  }

  it should "detect the pattern SEQ(A, B, C) with contiguity match" in {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.getConfig.disableSysoutLogging()
    val data = env.fromCollection(sample)
    val res = data.matchNFA(OurStreamingNFA.createNFA, env, ContiguityMatches)
  }
}

Source File: SimpleWordCount.scala From flink_training with Apache License 2.0

5 votes

package com.tmalaska.flinktraining.example.wordcount

import java.util.Properties
import java.util.concurrent.TimeUnit

import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time


object SimpleWordCount {
  def main(args: Array[String]) {

    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)
    val typeOfWindow = args(4)

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    // create a stream using socket

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val wordCountStream:DataStream[String] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))

    // implement word count
    val wordsStream = wordCountStream
      .flatMap(line => line.toUpperCase.split(' '))
      .map(word => (word, 1))
      //.flatMap{_.toUpperCase.split(' ')}
      //.map{ (_,1) }

    val keyValuePair = wordsStream.keyBy(0)

    val countPair = if (typeOfWindow.equals("slidingCount")) {
      //Slide by count.  Have a sliding window of 5 messages and trigger or slide 2 messages
      keyValuePair.countWindow(5, 2).sum(1)
    } else if (typeOfWindow.equals("tumbleTime")) {
      //Tumble by time.  Trigger and Slide by 5 seconds
      keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS)).sum(1)
    } else if (typeOfWindow.equals("slidingTime")) {
      //Slide by time.  Have a sliding window of 5 seconds that tiggers every 2 seconds
      keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS), Time.of(2, TimeUnit.SECONDS)).sum(1)
    } else {
      //Tumble by time.  Trigger every 5 seconds
      keyValuePair.countWindow(5).sum(1)
    }

    // print the results

    countPair.print()

    // execute the program

    env.execute("Scala WordCount Example")

  }
}

Source File: StreamingSQL.scala From flink_training with Apache License 2.0

5 votes

package com.tmalaska.flinktraining.example.wordcount

import java.util.Properties

import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.functions.sink.SinkFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.table.api.scala._
import org.apache.flink.table.api.{Table, TableEnvironment}
import org.apache.flink.types.Row


object StreamingSQL {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val tableEnv = TableEnvironment.getTableEnvironment(env)

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val entityCountStream:DataStream[(String, Int)] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))
      .flatMap(line => line.toUpperCase.split(' '))
      .map(word => (word, 1))

    tableEnv.registerDataStream("myTable2", entityCountStream, 'word, 'frequency)


    val roleUp = tableEnv.sqlQuery("SELECT word, SUM(frequency) FROM myTable2 GROUP BY word")

    val typeInfo = createTypeInformation[(String, Int)]
    val outStream = roleUp.toRetractStream(typeInfo)
    outStream.print()
    env.execute("Scala SQL Example")

  }
}

class CustomSinkFunction() extends SinkFunction[Row] {
  @throws[Exception]
  def invoke(value: Row): Unit = {
    //Do something
    println("-" + value)

  }
}

Source File: StreamingSessionExample.scala From flink_training with Apache License 2.0

5 votes

package com.tmalaska.flinktraining.example.session

import java.util.Properties

import net.liftweb.json.DefaultFormats
import net.liftweb.json.Serialization.read
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.util.Collector
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._

object StreamingSessionExample {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)
    val sessionTimeOut = args(4).toInt

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    //val socketStream = env.socketTextStream("localhost",9999, '\n')

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val messageStream:DataStream[String] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))

    val heartBeatStream = messageStream
      .map(str => {
        implicit val formats = DefaultFormats
        println("str:" + str)
        val hb = read[HeartBeat](str)
        (hb.entityId, hb.eventTime)
      }).keyBy(0).process(new MyProcessFunction(sessionTimeOut))

    heartBeatStream.map(session => {
      println("session:" + session)
      session
    })

    heartBeatStream.print()

    env.execute()
  }
}

class MyProcessFunction(sessionTimeOut:Int) extends ProcessFunction[(String,Long), SessionObj] {


  private var state:ValueState[SessionObj] = null


  override def open(parameters: Configuration): Unit = {
    state = getRuntimeContext.getState(new ValueStateDescriptor[SessionObj]("myState", classOf[SessionObj]))
  }

  override def processElement(value: (String, Long),
                              ctx: ProcessFunction[(String, Long), SessionObj]#Context,
                              out: Collector[SessionObj]): Unit = {
    val currentSession = state.value()
    var outBoundSessionRecord:SessionObj = null
    if (currentSession == null) {
      outBoundSessionRecord = SessionObj(value._2, value._2, 1)
    } else {
      outBoundSessionRecord = SessionObj(currentSession.startTime, value._2, currentSession.heartbeatCount + 1)

    }
    state.update(outBoundSessionRecord)
    out.collect(outBoundSessionRecord)
    ctx.timerService.registerEventTimeTimer(System.currentTimeMillis() + sessionTimeOut)
  }

  override def onTimer(timestamp: Long,
                       ctx: ProcessFunction[(String, Long), SessionObj]#OnTimerContext,
                       out: Collector[SessionObj]): Unit = {
    val result = state.value
    if (result != null && result.latestEndTime + sessionTimeOut < System.currentTimeMillis()) { // emit the state on timeout
      state.clear()
    }
  }
}

case class SessionObj(startTime:Long, latestEndTime:Long, heartbeatCount:Int)

Source File: EventTimeHeartBeatExample.scala From flink_training with Apache License 2.0

5 votes

package com.tmalaska.flinktraining.example.eventtime

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.tmalaska.flinktraining.example.session.HeartBeat
import net.liftweb.json.DefaultFormats
import net.liftweb.json.Serialization.read
import org.apache.flink.api.scala._
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks
import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.watermark.Watermark
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010

object EventTimeHeartBeatExample {
  def main(args: Array[String]) {

    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)
    val typeOfWindow = args(4)

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    // create a stream using socket

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val heartbeatStream:DataStream[HeartBeat] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))
      .map(json => {
        implicit val formats = DefaultFormats
        read[HeartBeat](json)
      })
      .assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks[HeartBeat]() {
        override def getCurrentWatermark: Watermark = {
          new Watermark(System.currentTimeMillis() - 10000)
        }

        override def extractTimestamp(element: HeartBeat, previousElementTimestamp: Long): Long = {
          element.eventTime
        }
      })

    // implement word count
    val entityCount = heartbeatStream
      .map(heartBeat => (heartBeat.entityId, 1))

    val keyValuePair = entityCount.keyBy(0)

    val countPair = if (typeOfWindow.equals("slidingCount")) {
      //Slide by count.  Have a sliding window of 5 messages and trigger or slide 2 messages
      keyValuePair.countWindow(5, 2).sum(1)
    } else if (typeOfWindow.equals("tumbleTime")) {
      //Tumble by time.  Trigger and Slide by 5 seconds
      keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS)).sum(1)
    } else if (typeOfWindow.equals("slidingTime")) {
      //Slide by time.  Have a sliding window of 5 seconds that tiggers every 2 seconds
      keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS), Time.of(2, TimeUnit.SECONDS)).sum(1)
    } else {
      //Tumble by time.  Trigger every 5 seconds
      keyValuePair.countWindow(5).sum(1)
    }

    // print the results

    countPair.print()

    // execute the program

    env.execute("Scala WordCount Example")

  }
}


class MessageTimestamp extends AssignerWithPeriodicWatermarks[HeartBeat] {
  override def getCurrentWatermark: Watermark = {
    //TODO
    null
  }

  override def extractTimestamp(t: HeartBeat, l: Long): Long = {
    //TODO
    -1
  }
}

Source File: WordCountTimeWindowWithSocket.scala From flink-hairless-notes with Apache License 2.0

5 votes

package wang.yangting.tech.flink.streaming.scala

import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.scala._

object WordCountTimeWindowWithSocket {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val text = env.socketTextStream("localhost", 9999)
    val counts = text.flatMap { _.toLowerCase.split(" +") filter{ _.nonEmpty }}
      .map { (_, 1) }
      .keyBy(0)
      .timeWindow(Time.seconds(5))
      .sum(1)
    counts.print()

    env.execute("Window Stream WordCount")
  }
}

Source File: TableKafkaJsonSQL.scala From flink-hairless-notes with Apache License 2.0

5 votes

package wang.yangting.tech.flink.streaming.scala.table

import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _}
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.scala.{StreamTableEnvironment, _}
import org.apache.flink.types.Row


  def output(): Unit = {
//    1001,zhangsan,100
//    1002,lisi,2000
//    1003,wangwu,1200
//    (true,100)
//    (false,100)
//    (true,2100)
//    (false,2100)
//    (true,3300)
  }
}

Source File: TableKafkaJsonConnector.scala From flink-hairless-notes with Apache License 2.0

5 votes

package wang.yangting.tech.flink.streaming.scala.table

import org.apache.flink.api.scala.typeutils.Types
import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _}
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.scala.{StreamTableEnvironment, _}
import org.apache.flink.table.descriptors.{Json, Kafka, Schema}
import org.apache.flink.types.Row


  def output(): Unit = {
//    1001,zhangsan,100
//    1002,lisi,2000
//    1003,wangwu,1200
//    (true,100)
//    (false,100)
//    (true,2100)
//    (false,2100)
//    (true,3300)
  }
}

Source File: TableFlinkStreamingQuery.scala From flink-hairless-notes with Apache License 2.0

5 votes

package wang.yangting.tech.flink.streaming.scala.table

import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _}
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.scala.StreamTableEnvironment
import org.apache.flink.table.api.scala._


    fsTableEnv.registerDataStream("users", words, 'id, 'name)

    // toAppendStream
    val selectAll = fsTableEnv.sqlQuery("SELECT name, id FROM users")
    selectAll.toAppendStream[(String, Int)].print()

    // toRetractStream
    val selectGroupByAll = fsTableEnv.sqlQuery("SELECT name, count(1) AS cnt FROM users GROUP BY name")
    selectGroupByAll.toRetractStream[(String, Long)].print()

    // execute
    fsEnv.execute("TableFlinkStreamingQuery")
  }
}

Source File: TransformationIterate.scala From flink-hairless-notes with Apache License 2.0

5 votes

package wang.yangting.tech.flink.streaming.scala.operators.transformation

import org.apache.flink.streaming.api.scala.{ConnectedStreams, StreamExecutionEnvironment}


object TransformationIterate {
  def main(args: Array[String]): Unit = {
        val env = StreamExecutionEnvironment.getExecutionEnvironment
        import org.apache.flink.api.scala._
        val dataStream = env.fromElements(3,1,2,1,5).map{t:Int => t}

        val iterated = dataStream.iterate((input : ConnectedStreams[Int,Int]) => {
            //分别定义两个map方法完成对输入ConnectedStreams数据集收集的处理
          val head = input.map(i => (i+1), s => s )
          (head.filter(_ % 2 ==0), head.filter(_ % 2 != 0)) },1000)

    //输出
    //5> 3
        iterated.print()
       env.execute()
  }
}

Source File: TransformationKeyByAndReduceAndAggregations.scala From flink-hairless-notes with Apache License 2.0

5 votes

package wang.yangting.tech.flink.streaming.scala.operators.transformation

import org.apache.flink.api.java.tuple.Tuple
import org.apache.flink.streaming.api.scala.{KeyedStream, StreamExecutionEnvironment}


object TransformationKeyByAndReduceAndAggregations {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment

    import org.apache.flink.api.scala._
    val dataStream = env.fromElements((1, 1), (1, 2), (2, 3), (2, 4))
    //指定第一个字段为分区key
    val keyedStream: KeyedStream[(Int, Int), Tuple] = dataStream.keyBy(0)
    //滚动对第二个字段进行reduce相加求和
    val reduceStream = keyedStream.reduce { (t1, t2) =>
      (t1._1, t1._2 + t2._2)
    }
    reduceStream.print("reduce") //reduce:8> (2,7) reduce:6> (1,3)

    //按照分区对第二个字段求和
    val sumStream = keyedStream.sum(1)
    sumStream.print("sum") //sum:8> (2,7) sum:6> (1,3)

    //滚动计算指定key最小值
    val minStream = keyedStream.min(1)
    minStream.print("min")

    //滚动计算指定key的最小值，返回最小值对应的元素
    val minByStream = keyedStream.minBy(1)
    minByStream.print("minBy")

    env.execute()
  }
}

Source File: TransformationConnect.scala From flink-hairless-notes with Apache License 2.0

5 votes

package wang.yangting.tech.flink.streaming.scala.operators.transformation

import org.apache.flink.streaming.api.functions.co.CoMapFunction
import org.apache.flink.streaming.api.scala.{ConnectedStreams, DataStream, StreamExecutionEnvironment}


object TransformationConnect {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    import org.apache.flink.api.scala._
    val dataStream1:DataStream[(String,Int)] = env.fromElements(("a",1),("b",2),("c",3),("d",4),("e",5))
    val dataStream2:DataStream[Int] =env.fromElements(1,2,3,4,5)
    val connectdeStream:ConnectedStreams[(String, Int), Int] = dataStream1.connect(dataStream2)

    val resultStream = connectdeStream.map(new CoMapFunction[(String,Int),Int,(Int,String)] {
      override def map1(in1: (String, Int)): (Int, String) = {
        (in1._2,in1._1)
      }

      override def map2(in2: Int): (Int, String) = {
        (in2,"default")
      }
    })

    resultStream.print()

    env.execute()
  }
}

Source File: TransformationUnion.scala From flink-hairless-notes with Apache License 2.0

5 votes

package wang.yangting.tech.flink.streaming.scala.operators.transformation

import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}


object TransformationUnion {
  def main(args: Array[String]): Unit = {
      val env = StreamExecutionEnvironment.getExecutionEnvironment

    import org.apache.flink.api.scala._
    val dataStream1:DataStream[(String,Int)]=env.fromElements(("a",1),("b",2),("c",3),("d",4),("e",5))
    val dataStream2:DataStream[(String,Int)]=env.fromElements(("a",6),("b",7),("c",8),("d",9),("e",10))
    val dataStream3:DataStream[(String,Int)]=env.fromElements(("a",11),("b",12),("c",13),("d",14),("e",15))
    val unionStream = dataStream1.union(dataStream2)
    val  unionStream2 = unionStream.union(dataStream3)
    unionStream2.print()
    env.execute()
  }
}

Source File: TransformationSplitAndSelect.scala From flink-hairless-notes with Apache License 2.0

5 votes

package wang.yangting.tech.flink.streaming.scala.operators.transformation

import org.apache.flink.streaming.api.scala.{DataStream, SplitStream, StreamExecutionEnvironment}


object TransformationSplitAndSelect {
  def main(args: Array[String]): Unit = {
      val env  = StreamExecutionEnvironment.getExecutionEnvironment
      import org.apache.flink.api.scala._
      val dataStream1: DataStream[(String,Int)] = env.fromElements(("a",3),("d",4),("c",2),("c",5),("a",5))
      val splitStream:SplitStream[(String,Int)] = dataStream1
        .split(t => if (t._2 % 2 == 0) Seq("even") else Seq("old"))
      val evenStream : DataStream[(String,Int)] = splitStream.select("even")
      val oddStream : DataStream[(String,Int)] = splitStream.select("odd")
      evenStream.print()
      env.execute()

  }
}

Source File: TransformationFilter.scala From flink-hairless-notes with Apache License 2.0

5 votes

package wang.yangting.tech.flink.streaming.scala.operators.transformation

import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}


object TransformationFilter {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment

    import org.apache.flink.api.scala._
    val dataStream = env.fromElements(("a",1),("b",2),("a",3),("c",4))
    //筛选出第二个元素是偶数的集合
    val filter:DataStream[(String, Int)]=dataStream.filter(_._2 % 2 ==0)
    filter.print()

    env.execute()
  }
}

Source File: DefaultSaverITCase.scala From flink-tensorflow with Apache License 2.0

5 votes

package org.apache.flink.contrib.tensorflow.io

import org.apache.flink.contrib.tensorflow.models.savedmodel.DefaultSavedModelLoader
import org.apache.flink.contrib.tensorflow.util.{FlinkTestBase, RegistrationUtils}
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Matchers, WordSpecLike}
import org.tensorflow.{Session, Tensor}

import scala.collection.JavaConverters._

@RunWith(classOf[JUnitRunner])
class DefaultSaverITCase extends WordSpecLike
  with Matchers
  with FlinkTestBase {

  override val parallelism = 1

  "A DefaultSaver" should {
    "run the save op" in {
      val env = StreamExecutionEnvironment.getExecutionEnvironment
      RegistrationUtils.registerTypes(env.getConfig)

      val loader = new DefaultSavedModelLoader(new Path("../models/half_plus_two"), "serve")
      val bundle = loader.load()
      val saverDef = loader.metagraph.getSaverDef
      val saver = new DefaultSaver(saverDef)

      def getA = getVariable(bundle.session(), "a").floatValue()
      def setA(value: Float) = setVariable(bundle.session(), "a", Tensor.create(value))

      val initialA = getA
      println("Initial value: " + initialA)

      setA(1.0f)
      val savePath = tempFolder.newFolder("model-0").getAbsolutePath
      val path = saver.save(bundle.session(), savePath)
      val savedA = getA
      savedA shouldBe (1.0f)
      println("Saved value: " + getA)

      setA(2.0f)
      val updatedA = getA
      updatedA shouldBe (2.0f)
      println("Updated value: " + updatedA)

      saver.restore(bundle.session(), path)
      val restoredA = getA
      restoredA shouldBe (savedA)
      println("Restored value: " + restoredA)
    }

    def getVariable(sess: Session, name: String): Tensor = {
      val result = sess.runner().fetch(name).run().asScala
      result.head
    }

    def setVariable(sess: Session, name: String, value: Tensor): Unit = {
      sess.runner()
        .addTarget(s"$name/Assign")
        .feed(s"$name/initial_value", value)
        .run()
    }
  }
}

org.apache.flink.streaming.api.scala.StreamExecutionEnvironment Scala Examples