org.apache.flink.streaming.api.scala.StreamExecutionEnvironment Scala Examples

The following examples show how to use org.apache.flink.streaming.api.scala.StreamExecutionEnvironment. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: ConsoleReporterTestJob.scala    From flink-stuff   with Apache License 2.0 6 votes vote down vote up
package com.jgrier.flinkstuff.jobs

import com.jgrier.flinkstuff.sources.IntegerSource
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.environment.LocalStreamEnvironment
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.api.scala._

object ConsoleReporterTestJob {
  def main(args: Array[String]) {
    val config = new Configuration()
    config.setString("metrics.reporters", "consoleReporter")
    config.setString("metrics.reporter.consoleReporter.class", "com.jgrier.flinkstuff.metrics.ConsoleReporter")
    config.setString("metrics.reporter.consoleReporter.interval", "10 SECONDS")

    val env = new StreamExecutionEnvironment(new LocalStreamEnvironment(config))
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    val stream = env.addSource(new IntegerSource(100))

    stream
      .timeWindowAll(Time.seconds(1))
      .sum(0)
      .print

    env.execute("ConsoleReporterTestJob")
  }
} 
Example 2
Source File: RegressITCase.scala    From flink-tensorflow   with Apache License 2.0 6 votes vote down vote up
package org.apache.flink.contrib.tensorflow.ml

import com.twitter.bijection.Conversion._
import org.apache.flink.api.common.functions.RichFlatMapFunction
import org.apache.flink.api.scala._
import org.apache.flink.configuration.Configuration
import org.apache.flink.contrib.tensorflow.ml.signatures.RegressionMethod._
import org.apache.flink.contrib.tensorflow.types.TensorInjections.{message2Tensor, messages2Tensor}
import org.apache.flink.contrib.tensorflow.util.TestData._
import org.apache.flink.contrib.tensorflow.util.{FlinkTestBase, RegistrationUtils}
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.util.Collector
import org.apache.flink.util.Preconditions.checkState
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Matchers, WordSpecLike}
import org.tensorflow.Tensor
import org.tensorflow.contrib.scala.Arrays._
import org.tensorflow.contrib.scala.Rank._
import org.tensorflow.contrib.scala._
import org.tensorflow.example.Example
import resource._

@RunWith(classOf[JUnitRunner])
class RegressITCase extends WordSpecLike
  with Matchers
  with FlinkTestBase {

  override val parallelism = 1

  type LabeledExample = (Example, Float)

  def examples(): Seq[LabeledExample] = {
    for (v <- Seq(0.0f -> 2.0f, 1.0f -> 2.5f, 2.0f -> 3.0f, 3.0f -> 3.5f))
      yield (example("x" -> feature(v._1)), v._2)
  }

  "A RegressFunction" should {
    "process elements" in {
      val env = StreamExecutionEnvironment.getExecutionEnvironment
      RegistrationUtils.registerTypes(env.getConfig)

      val model = new HalfPlusTwo(new Path("../models/half_plus_two"))

      val outputs = env
        .fromCollection(examples())
        .flatMap(new RichFlatMapFunction[LabeledExample, Float] {
          override def open(parameters: Configuration): Unit = model.open()
          override def close(): Unit = model.close()

          override def flatMap(value: (Example, Float), out: Collector[Float]): Unit = {
            for {
              x <- managed(Seq(value._1).toList.as[Tensor].taggedAs[ExampleTensor])
              y <- model.regress_x_to_y(x)
            } {
              // cast as a 1D tensor to use the available conversion
              val o = y.taggedAs[TypedTensor[`1D`,Float]].as[Array[Float]]
              val actual = o(0)
              checkState(actual == value._2)
              out.collect(actual)
            }
          }
        })
        .print()

      env.execute()
    }
  }
} 
Example 3
Source File: ContinueRising.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.cep

import java.util

import org.apache.flink.api.scala._
import org.apache.flink.cep.functions.PatternProcessFunction
import org.apache.flink.cep.pattern.conditions.IterativeCondition
import org.apache.flink.cep.scala.CEP
import org.apache.flink.cep.scala.pattern.Pattern
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.util.Collector
import org.slf4j.LoggerFactory


    val pattern = Pattern.begin[CepDemoEvent]("first")
      .next("second").where(new IterativeCondition[CepDemoEvent] {
      override def filter(currentEvent: CepDemoEvent, context: IterativeCondition.Context[CepDemoEvent]): Boolean = {
        // get last event
        val firstList = context.getEventsForPattern("first").iterator()
        var lastStart: CepDemoEvent = null
        // get last from firstList, and get the last one
        while (firstList.hasNext) {
          lastStart = firstList.next()
        }
        if (currentEvent.volume > lastStart.volume) {
          true
        } else {
          false
        }
      }
    })
      // always remember add within, it will reduce the state usage
      .within(Time.minutes(5 * 60 * 1000))

    val patternStream = CEP.pattern(input, pattern)

    val result: DataStream[String] = patternStream.process(
      new PatternProcessFunction[CepDemoEvent, String]() {
        override def processMatch(
                                   events: util.Map[String, util.List[CepDemoEvent]],
                                   ctx: PatternProcessFunction.Context,
                                   out: Collector[String]): Unit = {
          // get the change
          val first = events.get("first").get(0)
          val second = events.get("second").get(0)
          val change = second.volume - first.volume
          out.collect("from : " + first.id + ", to " + second.id + ", change : " + change)
        }

      })

    // for convenient, just print
    result.print()
    env.execute(this.getClass.getName)
  }


} 
Example 4
Source File: SourceFunctionExample.scala    From examples-scala   with Apache License 2.0 5 votes vote down vote up
package io.github.streamingwithflink.chapter8

import org.apache.flink.api.common.state.{ListState, ListStateDescriptor}
import org.apache.flink.api.scala._
import org.apache.flink.runtime.state.{FunctionInitializationContext, FunctionSnapshotContext}
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}

object SourceFunctionExample {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    val numbers: DataStream[Long] = env.addSource(new CountSource)
    numbers.print()

    env.execute()
  }

}

class CountSource extends SourceFunction[Long] {

  var isRunning: Boolean = true

  override def run(ctx: SourceFunction.SourceContext[Long]): Unit = {

    var cnt: Long = -1
    while (isRunning && cnt < Long.MaxValue) {
      // increment cnt
      cnt += 1
      ctx.collect(cnt)
    }

  }

  override def cancel(): Unit = isRunning = false
}

class ReplayableCountSource extends SourceFunction[Long] with CheckpointedFunction {

  var isRunning: Boolean = true
  var cnt: Long = _
  var offsetState: ListState[Long] = _

  override def run(ctx: SourceFunction.SourceContext[Long]): Unit = {

    while (isRunning && cnt < Long.MaxValue) {
      ctx.getCheckpointLock.synchronized {
        // increment cnt
        cnt += 1
        ctx.collect(cnt)
      }
    }
  }

  override def cancel(): Unit = isRunning = false

  override def snapshotState(snapshotCtx: FunctionSnapshotContext): Unit = {
    // remove previous cnt
    offsetState.clear()
    // add current cnt
    offsetState.add(cnt)
  }

  override def initializeState(initCtx: FunctionInitializationContext): Unit = {
    // obtain operator list state to store the current cnt
    val desc = new ListStateDescriptor[Long]("offset", classOf[Long])
    offsetState = initCtx.getOperatorStateStore.getListState(desc)

    // initialize cnt variable from the checkpoint
    val it = offsetState.get()
    cnt = if (null == it || !it.iterator().hasNext) {
      -1L
    } else {
      it.iterator().next()
    }
  }
} 
Example 5
Source File: AfterMatchStrategyDemo.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.cep

import java.util

import com.venn.common.Common
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.cep.functions.PatternProcessFunction
import org.apache.flink.cep.nfa.aftermatch.AfterMatchSkipStrategy
import org.apache.flink.cep.pattern.conditions.IterativeCondition
import org.apache.flink.cep.scala.CEP
import org.apache.flink.cep.scala.pattern.Pattern
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector
import org.slf4j.LoggerFactory


    val noSkit = AfterMatchSkipStrategy.noSkip()
    val pattern = Pattern.begin[CepDemoEvent]("first").where(event => {
      event.name.equals("a")
    })
      //      .timesOrMore(1)
      .next("second").where(event => {
      event.name.equals("a")
    })
      .next("third").where(event => {
      event.name.equals("b")
    })
//      .notNext()

    // always remember add within, it will reduce the state usage
    //      .within(Time.minutes(5 * 60 * 1000))

    val patternStream = CEP.pattern(input, pattern)

    val result: DataStream[String] = patternStream.process(
      new PatternProcessFunction[CepDemoEvent, String]() {
        override def processMatch(
                                   events: util.Map[String, util.List[CepDemoEvent]],
                                   ctx: PatternProcessFunction.Context,
                                   out: Collector[String]): Unit = {
          // get the change
          val first = events.get("first").get(0)
          val second = events.get("second").get(0)
          val third = events.get("third").get(0)
          out.collect("first : " + first + ", first " + second + ", third : " + third)
        }

      })

    // for convenient, just print
    result.print()
    env.execute(this.getClass.getName)
  }


} 
Example 6
Source File: CacheFile.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.tableJoin

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.scala._
import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.configuration.Configuration
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer

import scala.io.Source


object CacheFile {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
      env.registerCachedFile("/opt/flink1.7/data/tablejoin.txt", "tablejoin.txt")
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
      // file and register name
      env.registerCachedFile("C:\\Users\\venn\\git\\venn\\flinkDemo\\src\\main\\resources\\data\\tablejoin.txt", "tablejoin.txt")
    }
    // cache table


    val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
    val source = new FlinkKafkaConsumer[ObjectNode]("table_join", new JsonNodeDeserializationSchema, Common.getProp)


    env.addSource(source)
      .map(json => {

        val id = json.get("id").asText()
        val phone = json.get("phone").asText()

        Tuple2(id, phone)
      })
      .map(new RichMapFunction[(String, String), String] {

        var cache = Map("" -> "")

        override def open(parameters: Configuration): Unit = {

          // read cache file
          val file = getRuntimeContext.getDistributedCache.getFile("tablejoin.txt")
          if (file.canRead) {
            val context = Source.fromFile(file, "utf-8").getLines().toArray

           context.foreach(line => {
             val tmp = line.split(",")
             cache += (tmp(0) -> tmp(1))
           })
          }
        }

        override def map(value: (String, String)): String = {
          val name = cache.get(value._1)

          value._1 + "," + value._2 + "," + cache.get(value._1)
        }

      })
      .print()

    env.execute("cacheFile")

  }

} 
Example 7
Source File: CustomerTimerDemo.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.timer

import java.io.File
import java.sql.{Connection, DriverManager, PreparedStatement, SQLException}
import java.util
import java.util.{Timer, TimerTask}
import org.apache.flink.api.scala._
import com.venn.common.Common
import com.venn.util.TwoStringSource
import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.configuration.Configuration
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer
import org.slf4j.LoggerFactory


      def query() = {
        logger.info("query mysql")
        try {
          Class.forName(driverName)
          conn = DriverManager.getConnection(jdbcUrl, username, password)
          ps = conn.prepareStatement("select id,name from venn.timer")
          val rs = ps.executeQuery

          while (!rs.isClosed && rs.next) {
            val id = rs.getString(1)
            val name = rs.getString(2)
            map.put(id, name)
          }
          logger.info("get config from db size : {}", map.size())

        } catch {
          case e@(_: ClassNotFoundException | _: SQLException) =>
            e.printStackTrace()
        } finally {
          if (conn != null) {
            conn.close()
          }
        }
      }
    })
//              .print()


    val sink = new FlinkKafkaProducer[String]("timer_out"
      , new SimpleStringSchema()
      , Common.getProp)
    stream.addSink(sink)
    env.execute(this.getClass.getName)

  }

} 
Example 8
Source File: StreamingFileSinkDemo.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.filesink

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.common.serialization.{BulkWriter, SimpleStringEncoder}
import org.apache.flink.api.scala._
import org.apache.flink.core.fs.Path
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer

object StreamingFileSinkDemo {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
    }

    val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
    val source = new FlinkKafkaConsumer[ObjectNode]("roll_file_sink", new JsonNodeDeserializationSchema, Common.getProp)
    // row format
    val sinkRow = StreamingFileSink
      .forRowFormat(new Path("D:\\idea_out\\rollfilesink"), new SimpleStringEncoder[ObjectNode]("UTF-8"))
      .withBucketAssigner(new DayBucketAssigner)
      .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour
      .build()

    // use define BulkWriterFactory and DayBucketAssinger
    val sinkBuck = StreamingFileSink
      .forBulkFormat(new Path("D:\\idea_out\\rollfilesink"), new DayBulkWriterFactory)
      .withBucketAssigner(new DayBucketAssigner())
      .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour
      .build()


    env.addSource(source)
      .assignAscendingTimestamps(json => {
        sdf.parse(json.get("date").asText()).getTime
      })
      .map(json => {
//        json.get("date") + "-" + json.toString
        json
      })
      .addSink(sinkBuck)

    env.execute("StreamingFileSink")
  }

} 
Example 9
Source File: RollingFileSinkDemo.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.filesink

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.fs.StringWriter
import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.api.scala._


    val sink = new BucketingSink[String]("D:\\idea_out\\rollfilesink")
    sink.setBucketer(new DayBasePathBucketer)
    sink.setWriter(new StringWriter[String])
    sink.setBatchSize(1024 * 1024 * 400) // this is 400 MB,
    //    sink.setBatchRolloverInterval(24 * 60 * 60 * 1000) // this is 24 hour
//    sink.setInProgressPrefix("inProcessPre")
//    sink.setPendingPrefix("pendingpre")
//    sink.setPartPrefix("partPre")

    env.addSource(source)
      .assignAscendingTimestamps(json => {
        sdf.parse(json.get("date").asText()).getTime
      })
      .map(json => {
        json.get("date") + "-" + json.toString
      })
      .addSink(sink)

    env.execute("rollingFileSink")
  }

} 
Example 10
Source File: ProcessWindowForTrigger.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.trigger

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.contrib.streaming.state.RocksDBStateBackend
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector
import org.slf4j.LoggerFactory


object ProcessWindowDemoForTrigger {
  val logger = LoggerFactory.getLogger(this.getClass)

  def main(args: Array[String]): Unit = {
    // environment
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    if ("\\".equals(File.pathSeparator)) {
      val rock = new RocksDBStateBackend(Common.CHECK_POINT_DATA_DIR)
      env.setStateBackend(rock)
      // checkpoint interval
      env.enableCheckpointing(10000)
    }

    val topic = "current_day"
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")

    val kafkaSource = new FlinkKafkaConsumer[String](topic, new SimpleStringSchema(), Common.getProp)
    val stream = env.addSource(kafkaSource)
      .map(s => {
        s
      })
      .windowAll(TumblingProcessingTimeWindows.of(Time.seconds(60)))
      .trigger(CountAndTimeTrigger.of(10, Time.seconds(10)))
      .process(new ProcessAllWindowFunction[String, String, TimeWindow] {
        override def process(context: Context, elements: Iterable[String], out: Collector[String]): Unit = {

          var count = 0

          elements.iterator.foreach(s => {
            count += 1
          })
          logger.info("this trigger have : {} item", count)
        }
      })

    // execute job
    env.execute(this.getClass.getName)
  }

} 
Example 11
Source File: MysqlOutputDemo.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.jdbcOutput

import java.io.File

import com.venn.common.Common
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala.{OutputTag, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector


object MysqlOutputDemo {

  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
    }

    val source = new FlinkKafkaConsumer[String]("mysql_output", new SimpleStringSchema, Common.getProp)
    source.setStartFromLatest()
    env.addSource(source)
        .map(li => {
          val tmp = li.split(",")
          new User(tmp(0), tmp(1), tmp(2)toInt, tmp(3))
        })
//        .addSink(new MysqlSink1)
      .writeUsingOutputFormat(new MysqlSink1)

    env.execute("msqlOutput")
  }

} 
Example 12
Source File: CurrentDayPvCountWaterMark.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.dayWindow

import java.io.File
import java.text.SimpleDateFormat

import com.venn.common.Common
import com.venn.source.TumblingEventTimeWindows
import org.apache.flink.api.common.functions.ReduceFunction
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.contrib.streaming.state.RocksDBStateBackend
import org.apache.flink.formats.json.JsonNodeDeserializationSchema
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.triggers.{ContinuousEventTimeTrigger, ContinuousProcessingTimeTrigger, CountTrigger}
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, FlinkKafkaProducer}


      .assignAscendingTimestamps(event => sdf.parse(event.createTime).getTime)
      .windowAll(TumblingEventTimeWindows.of(Time.days(1), Time.hours(-8)))
      .reduce(new ReduceFunction[Eventx] {
        override def reduce(event1: Eventx, event2: Eventx): Eventx = {

//          println("reduce event : " +  event2.toString)
          //            val minId:String = if (event1.id.compareTo(event2.id) >= 0 ) event2.id else event1.id
          //            val maxId = if (event1.id.compareTo(event2.id) < 0 ) event1.id else event2.id
          //            val minCreateTime = if ( event1.createTime.compareTo(event2.createTime) >= 0 ) event2.createTime else event1.createTime
          //            val maxCreateTime = if ( event1.createTime.compareTo(event2.createTime) < 0 ) event1.createTime else event2.createTime
          //            val count = event1.count + event2.count
          //            new EventResult(minId, maxId, minCreateTime, maxCreateTime, count)
          new Eventx(event1.id , event2.id , event1.amt + event2.amt)
        }
      })
      // format output even, connect min max id, add current timestamp
//      .map(event => Event(event.id + "-" + event.createTime, sdf.format(System.currentTimeMillis()), event.count))
    stream.print("result : ")
    // execute job
    env.execute("CurrentDayCount")
  }

} 
Example 13
Source File: BroadCastDemo.scala    From flink-rookie   with Apache License 2.0 5 votes vote down vote up
package com.venn.stream.api.broadcast

import java.io.File

import com.venn.common.Common
import com.venn.util.StringUtil
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.common.state.MapStateDescriptor
import org.apache.flink.api.common.typeinfo.BasicTypeInfo
import org.apache.flink.api.scala._
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector


object BroadCastDemo {

  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    if ("/".equals(File.separator)) {
      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
      env.setStateBackend(backend)
      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
    } else {
      env.setMaxParallelism(1)
      env.setParallelism(1)
    }
    // 配置更新流
    val configSource = new FlinkKafkaConsumer[String]("broad_cast_demo", new SimpleStringSchema, Common.getProp)
    // 配置流的初始化,可以通过读取配置文件实现
    var initFilePath = ""
    if ("/".equals(File.separator)){
      initFilePath = "hdfs:///venn/init_file.txt"
    }else{
      initFilePath = "D:\\idea_out\\broad_cast.txt"
    }
    val init = env.readTextFile(initFilePath)
    val descriptor = new MapStateDescriptor[String,  String]("dynamicConfig", BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO)
    val configStream = env.addSource(configSource).union(init).broadcast(descriptor)


    val input = env.addSource(new RadomFunction)
      .connect(configStream)
      .process(new BroadcastProcessFunction[String, String, String] {
        override def processBroadcastElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#Context, out: Collector[String]): Unit = {

          println("new config : " + value)
          val configMap = ctx.getBroadcastState(descriptor)
          // process update configMap,读取配置数据,写入广播状态中
          val line = value.split(",")
          configMap.put(line(0), line(1))
        }
        override def processElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#ReadOnlyContext, out: Collector[String]): Unit = {
          // use give key, return value
          val configMap = ctx.getBroadcastState(descriptor)
          // 解析三位城市编码,根据广播状态对应的map,转码为城市对应中文
//          println(value)
          val line = value.split(",")
          val code = line(0)
          var va = configMap.get(code)
          // 不能转码的数据默认输出 中国(code=xxx)
          if ( va == null){
            va = "中国(code="+code+")";
          }else{
            va = va + "(code="+code+")"
          }
          out.collect(va + "," + line(1))
        }
      })
    input.print()

    env.execute("BroadCastDemo")
  }
}

class RadomFunction extends SourceFunction[String]{
  var flag = true
  override def cancel(): Unit = {
    flag = false
  }

  override def run(ctx: SourceFunction.SourceContext[String]): Unit = {
    while (flag){
      for (i <- 0 to 300) {
        var nu = i.toString
        while (nu.length < 3) {
          nu = "0" + nu
        }
        ctx.collect(nu + "," + StringUtil.getRandomString(5))
        Thread.sleep(2000)
      }
    }
  }
} 
Example 14
Source File: FlinkTestUtils.scala    From flink-parameter-server   with Apache License 2.0 5 votes vote down vote up
package hu.sztaki.ilab.ps.test.utils

import org.apache.flink.runtime.client.JobExecutionException
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment

object FlinkTestUtils {

  case class SuccessException[T](content: T) extends Exception {
    override def toString: String = s"SuccessException($content)"
  }

  case class NoSuccessExceptionReceived() extends Exception

  def executeWithSuccessCheck[T](env: StreamExecutionEnvironment)(checker: T => Unit): Unit = {
    try {
      env.execute()
      throw NoSuccessExceptionReceived()
    } catch {
      case e: JobExecutionException =>
        val rootCause = Stream.iterate[Throwable](e)(_.getCause()).takeWhile(_ != null).last
        rootCause match {
          case successException: SuccessException[T] =>
            checker(successException.content)
          case otherCause =>
            throw e
        }
      case e: Throwable => throw e
    }
  }
} 
Example 15
Source File: FlinkStreamingCEPTest.scala    From piglet   with Apache License 2.0 5 votes vote down vote up
package dbis.cep.test.flink

import java.io.File

import dbis.piglet.backends.{ Record, SchemaClass }
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.scalatest._
import org.apache.commons.io.FileUtils
import org.apache.flink.api.scala._
import dbis.piglet.cep.nfa._
import dbis.piglet.cep.ops.SelectionStrategy._
import dbis.piglet.cep.ops.OutputStrategy._
import dbis.piglet.cep.flink.CustomDataStreamMatcher._
import scala.collection.mutable.ArrayBuffer
import org.apache.flink.streaming.api.windowing.windows.GlobalWindow
import org.apache.flink.streaming.api.windowing.assigners.GlobalWindows

case class StreamingDoubleRecord(col1: Int, col2: Int) extends java.io.Serializable with SchemaClass {
  override def mkString(delim: String) = s"$col1$delim$col2"
}

object OurStreamingNFA {
    def filter1(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 1
    def filter2(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 2
    def filter3(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 3
    def createNFA = {
      val testNFA: NFAController[StreamingDoubleRecord] = new NFAController()
      val firstState = testNFA.createAndGetStartState("First")
      val secondState = testNFA.createAndGetNormalState("Second")
      val thirdState = testNFA.createAndGetNormalState("Third")
      val finalState = testNFA.createAndGetFinalState("Final")

      val firstEdge = testNFA.createAndGetForwardEdge(filter1)
      val secondEdge = testNFA.createAndGetForwardEdge(filter2)
      val thirdEdge = testNFA.createAndGetForwardEdge(filter3)

      testNFA.createForwardTransition(firstState, firstEdge, secondState)
      testNFA.createForwardTransition(secondState, secondEdge, thirdState)
      testNFA.createForwardTransition(thirdState, thirdEdge, finalState)
      testNFA
    }
  }

class FlinkStreamingCEPTest extends FlatSpec with Matchers with BeforeAndAfterEach {
  var resultArray = new ArrayBuffer[StreamingDoubleRecord]
  override def beforeEach() {
     resultArray.clear()
  }

  val sample = Seq(
      StreamingDoubleRecord(1,1), 
      StreamingDoubleRecord(2,2), 
      StreamingDoubleRecord(1,3), 
      StreamingDoubleRecord(2,4), 
      StreamingDoubleRecord(3,5), 
      StreamingDoubleRecord(1,6),
      StreamingDoubleRecord(2,7),
      StreamingDoubleRecord(3,8))
      
  "Flink Streaming CEP" should "detect the pattern SEQ(A, B, C) with first match" in {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.getConfig.disableSysoutLogging()
    val data = env.fromCollection(sample)
    val res = data.matchNFA(OurStreamingNFA.createNFA, env, FirstMatch)
  }

  it should "detect the pattern SEQ(A, B, C) with any match" in {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.getConfig.disableSysoutLogging()
    val data = env.fromCollection(sample)
    val res = data.matchNFA(OurStreamingNFA.createNFA, env, AllMatches)
  }

  it should "detect the pattern SEQ(A, B, C) with next match" in {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.getConfig.disableSysoutLogging()
    val data = env.fromCollection(sample)
    val res = data.matchNFA(OurStreamingNFA.createNFA, env, NextMatches)
  }

  it should "detect the pattern SEQ(A, B, C) with contiguity match" in {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.getConfig.disableSysoutLogging()
    val data = env.fromCollection(sample)
    val res = data.matchNFA(OurStreamingNFA.createNFA, env, ContiguityMatches)
  }
} 
Example 16
Source File: SimpleWordCount.scala    From flink_training   with Apache License 2.0 5 votes vote down vote up
package com.tmalaska.flinktraining.example.wordcount

import java.util.Properties
import java.util.concurrent.TimeUnit

import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time


object SimpleWordCount {
  def main(args: Array[String]) {

    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)
    val typeOfWindow = args(4)

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    // create a stream using socket

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val wordCountStream:DataStream[String] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))

    // implement word count
    val wordsStream = wordCountStream
      .flatMap(line => line.toUpperCase.split(' '))
      .map(word => (word, 1))
      //.flatMap{_.toUpperCase.split(' ')}
      //.map{ (_,1) }

    val keyValuePair = wordsStream.keyBy(0)

    val countPair = if (typeOfWindow.equals("slidingCount")) {
      //Slide by count.  Have a sliding window of 5 messages and trigger or slide 2 messages
      keyValuePair.countWindow(5, 2).sum(1)
    } else if (typeOfWindow.equals("tumbleTime")) {
      //Tumble by time.  Trigger and Slide by 5 seconds
      keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS)).sum(1)
    } else if (typeOfWindow.equals("slidingTime")) {
      //Slide by time.  Have a sliding window of 5 seconds that tiggers every 2 seconds
      keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS), Time.of(2, TimeUnit.SECONDS)).sum(1)
    } else {
      //Tumble by time.  Trigger every 5 seconds
      keyValuePair.countWindow(5).sum(1)
    }

    // print the results

    countPair.print()

    // execute the program

    env.execute("Scala WordCount Example")

  }
} 
Example 17
Source File: StreamingSQL.scala    From flink_training   with Apache License 2.0 5 votes vote down vote up
package com.tmalaska.flinktraining.example.wordcount

import java.util.Properties

import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.functions.sink.SinkFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.table.api.scala._
import org.apache.flink.table.api.{Table, TableEnvironment}
import org.apache.flink.types.Row


object StreamingSQL {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val tableEnv = TableEnvironment.getTableEnvironment(env)

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val entityCountStream:DataStream[(String, Int)] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))
      .flatMap(line => line.toUpperCase.split(' '))
      .map(word => (word, 1))

    tableEnv.registerDataStream("myTable2", entityCountStream, 'word, 'frequency)


    val roleUp = tableEnv.sqlQuery("SELECT word, SUM(frequency) FROM myTable2 GROUP BY word")

    val typeInfo = createTypeInformation[(String, Int)]
    val outStream = roleUp.toRetractStream(typeInfo)
    outStream.print()
    env.execute("Scala SQL Example")

  }
}

class CustomSinkFunction() extends SinkFunction[Row] {
  @throws[Exception]
  def invoke(value: Row): Unit = {
    //Do something
    println("-" + value)

  }
} 
Example 18
Source File: StreamingSessionExample.scala    From flink_training   with Apache License 2.0 5 votes vote down vote up
package com.tmalaska.flinktraining.example.session

import java.util.Properties

import net.liftweb.json.DefaultFormats
import net.liftweb.json.Serialization.read
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.util.Collector
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._

object StreamingSessionExample {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)
    val sessionTimeOut = args(4).toInt

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    //val socketStream = env.socketTextStream("localhost",9999, '\n')

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val messageStream:DataStream[String] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))

    val heartBeatStream = messageStream
      .map(str => {
        implicit val formats = DefaultFormats
        println("str:" + str)
        val hb = read[HeartBeat](str)
        (hb.entityId, hb.eventTime)
      }).keyBy(0).process(new MyProcessFunction(sessionTimeOut))

    heartBeatStream.map(session => {
      println("session:" + session)
      session
    })

    heartBeatStream.print()

    env.execute()
  }
}

class MyProcessFunction(sessionTimeOut:Int) extends ProcessFunction[(String,Long), SessionObj] {


  private var state:ValueState[SessionObj] = null


  override def open(parameters: Configuration): Unit = {
    state = getRuntimeContext.getState(new ValueStateDescriptor[SessionObj]("myState", classOf[SessionObj]))
  }

  override def processElement(value: (String, Long),
                              ctx: ProcessFunction[(String, Long), SessionObj]#Context,
                              out: Collector[SessionObj]): Unit = {
    val currentSession = state.value()
    var outBoundSessionRecord:SessionObj = null
    if (currentSession == null) {
      outBoundSessionRecord = SessionObj(value._2, value._2, 1)
    } else {
      outBoundSessionRecord = SessionObj(currentSession.startTime, value._2, currentSession.heartbeatCount + 1)

    }
    state.update(outBoundSessionRecord)
    out.collect(outBoundSessionRecord)
    ctx.timerService.registerEventTimeTimer(System.currentTimeMillis() + sessionTimeOut)
  }

  override def onTimer(timestamp: Long,
                       ctx: ProcessFunction[(String, Long), SessionObj]#OnTimerContext,
                       out: Collector[SessionObj]): Unit = {
    val result = state.value
    if (result != null && result.latestEndTime + sessionTimeOut < System.currentTimeMillis()) { // emit the state on timeout
      state.clear()
    }
  }
}

case class SessionObj(startTime:Long, latestEndTime:Long, heartbeatCount:Int) 
Example 19
Source File: EventTimeHeartBeatExample.scala    From flink_training   with Apache License 2.0 5 votes vote down vote up
package com.tmalaska.flinktraining.example.eventtime

import java.util.Properties
import java.util.concurrent.TimeUnit

import com.tmalaska.flinktraining.example.session.HeartBeat
import net.liftweb.json.DefaultFormats
import net.liftweb.json.Serialization.read
import org.apache.flink.api.scala._
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks
import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.watermark.Watermark
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010

object EventTimeHeartBeatExample {
  def main(args: Array[String]) {

    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val kafkaTopic = args(2)
    val groupId = args(3)
    val typeOfWindow = args(4)

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    // create a stream using socket

    val properties = new Properties
    properties.setProperty("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    properties.setProperty("zookeeper.connect", "localhost:2181")
    properties.setProperty("group.id", groupId)

    println("kafkaTopic:" + kafkaTopic)

    val heartbeatStream:DataStream[HeartBeat] = env.addSource(
      new FlinkKafkaConsumer010(kafkaTopic, new SimpleStringSchema(), properties))
      .map(json => {
        implicit val formats = DefaultFormats
        read[HeartBeat](json)
      })
      .assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks[HeartBeat]() {
        override def getCurrentWatermark: Watermark = {
          new Watermark(System.currentTimeMillis() - 10000)
        }

        override def extractTimestamp(element: HeartBeat, previousElementTimestamp: Long): Long = {
          element.eventTime
        }
      })

    // implement word count
    val entityCount = heartbeatStream
      .map(heartBeat => (heartBeat.entityId, 1))

    val keyValuePair = entityCount.keyBy(0)

    val countPair = if (typeOfWindow.equals("slidingCount")) {
      //Slide by count.  Have a sliding window of 5 messages and trigger or slide 2 messages
      keyValuePair.countWindow(5, 2).sum(1)
    } else if (typeOfWindow.equals("tumbleTime")) {
      //Tumble by time.  Trigger and Slide by 5 seconds
      keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS)).sum(1)
    } else if (typeOfWindow.equals("slidingTime")) {
      //Slide by time.  Have a sliding window of 5 seconds that tiggers every 2 seconds
      keyValuePair.timeWindow(Time.of(5, TimeUnit.SECONDS), Time.of(2, TimeUnit.SECONDS)).sum(1)
    } else {
      //Tumble by time.  Trigger every 5 seconds
      keyValuePair.countWindow(5).sum(1)
    }

    // print the results

    countPair.print()

    // execute the program

    env.execute("Scala WordCount Example")

  }
}


class MessageTimestamp extends AssignerWithPeriodicWatermarks[HeartBeat] {
  override def getCurrentWatermark: Watermark = {
    //TODO
    null
  }

  override def extractTimestamp(t: HeartBeat, l: Long): Long = {
    //TODO
    -1
  }
} 
Example 20
Source File: WordCountTimeWindowWithSocket.scala    From flink-hairless-notes   with Apache License 2.0 5 votes vote down vote up
package wang.yangting.tech.flink.streaming.scala

import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.scala._

object WordCountTimeWindowWithSocket {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val text = env.socketTextStream("localhost", 9999)
    val counts = text.flatMap { _.toLowerCase.split(" +") filter{ _.nonEmpty }}
      .map { (_, 1) }
      .keyBy(0)
      .timeWindow(Time.seconds(5))
      .sum(1)
    counts.print()

    env.execute("Window Stream WordCount")
  }
} 
Example 21
Source File: TableKafkaJsonSQL.scala    From flink-hairless-notes   with Apache License 2.0 5 votes vote down vote up
package wang.yangting.tech.flink.streaming.scala.table

import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _}
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.scala.{StreamTableEnvironment, _}
import org.apache.flink.types.Row


  def output(): Unit = {
//    1001,zhangsan,100
//    1002,lisi,2000
//    1003,wangwu,1200
//    (true,100)
//    (false,100)
//    (true,2100)
//    (false,2100)
//    (true,3300)
  }
} 
Example 22
Source File: TableKafkaJsonConnector.scala    From flink-hairless-notes   with Apache License 2.0 5 votes vote down vote up
package wang.yangting.tech.flink.streaming.scala.table

import org.apache.flink.api.scala.typeutils.Types
import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _}
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.scala.{StreamTableEnvironment, _}
import org.apache.flink.table.descriptors.{Json, Kafka, Schema}
import org.apache.flink.types.Row


  def output(): Unit = {
//    1001,zhangsan,100
//    1002,lisi,2000
//    1003,wangwu,1200
//    (true,100)
//    (false,100)
//    (true,2100)
//    (false,2100)
//    (true,3300)
  }
} 
Example 23
Source File: TableFlinkStreamingQuery.scala    From flink-hairless-notes   with Apache License 2.0 5 votes vote down vote up
package wang.yangting.tech.flink.streaming.scala.table

import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _}
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.scala.StreamTableEnvironment
import org.apache.flink.table.api.scala._


    fsTableEnv.registerDataStream("users", words, 'id, 'name)

    // toAppendStream
    val selectAll = fsTableEnv.sqlQuery("SELECT name, id FROM users")
    selectAll.toAppendStream[(String, Int)].print()

    // toRetractStream
    val selectGroupByAll = fsTableEnv.sqlQuery("SELECT name, count(1) AS cnt FROM users GROUP BY name")
    selectGroupByAll.toRetractStream[(String, Long)].print()

    // execute
    fsEnv.execute("TableFlinkStreamingQuery")
  }
} 
Example 24
Source File: TransformationIterate.scala    From flink-hairless-notes   with Apache License 2.0 5 votes vote down vote up
package wang.yangting.tech.flink.streaming.scala.operators.transformation

import org.apache.flink.streaming.api.scala.{ConnectedStreams, StreamExecutionEnvironment}


object TransformationIterate {
  def main(args: Array[String]): Unit = {
        val env = StreamExecutionEnvironment.getExecutionEnvironment
        import org.apache.flink.api.scala._
        val dataStream = env.fromElements(3,1,2,1,5).map{t:Int => t}

        val iterated = dataStream.iterate((input : ConnectedStreams[Int,Int]) => {
            //分别定义两个map方法完成对输入ConnectedStreams数据集收集的处理
          val head = input.map(i => (i+1), s => s )
          (head.filter(_ % 2 ==0), head.filter(_ % 2 != 0)) },1000)

    //输出
    //5> 3
        iterated.print()
       env.execute()
  }
} 
Example 25
Source File: TransformationKeyByAndReduceAndAggregations.scala    From flink-hairless-notes   with Apache License 2.0 5 votes vote down vote up
package wang.yangting.tech.flink.streaming.scala.operators.transformation

import org.apache.flink.api.java.tuple.Tuple
import org.apache.flink.streaming.api.scala.{KeyedStream, StreamExecutionEnvironment}


object TransformationKeyByAndReduceAndAggregations {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment

    import org.apache.flink.api.scala._
    val dataStream = env.fromElements((1, 1), (1, 2), (2, 3), (2, 4))
    //指定第一个字段为分区key
    val keyedStream: KeyedStream[(Int, Int), Tuple] = dataStream.keyBy(0)
    //滚动对第二个字段进行reduce相加求和
    val reduceStream = keyedStream.reduce { (t1, t2) =>
      (t1._1, t1._2 + t2._2)
    }
    reduceStream.print("reduce") //reduce:8> (2,7) reduce:6> (1,3)

    //按照分区对第二个字段求和
    val sumStream = keyedStream.sum(1)
    sumStream.print("sum") //sum:8> (2,7) sum:6> (1,3)

    //滚动计算指定key最小值
    val minStream = keyedStream.min(1)
    minStream.print("min")

    //滚动计算指定key的最小值,返回最小值对应的元素
    val minByStream = keyedStream.minBy(1)
    minByStream.print("minBy")

    env.execute()
  }
} 
Example 26
Source File: TransformationConnect.scala    From flink-hairless-notes   with Apache License 2.0 5 votes vote down vote up
package wang.yangting.tech.flink.streaming.scala.operators.transformation

import org.apache.flink.streaming.api.functions.co.CoMapFunction
import org.apache.flink.streaming.api.scala.{ConnectedStreams, DataStream, StreamExecutionEnvironment}


object TransformationConnect {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    import org.apache.flink.api.scala._
    val dataStream1:DataStream[(String,Int)] = env.fromElements(("a",1),("b",2),("c",3),("d",4),("e",5))
    val dataStream2:DataStream[Int] =env.fromElements(1,2,3,4,5)
    val connectdeStream:ConnectedStreams[(String, Int), Int] = dataStream1.connect(dataStream2)

    val resultStream = connectdeStream.map(new CoMapFunction[(String,Int),Int,(Int,String)] {
      override def map1(in1: (String, Int)): (Int, String) = {
        (in1._2,in1._1)
      }

      override def map2(in2: Int): (Int, String) = {
        (in2,"default")
      }
    })

    resultStream.print()

    env.execute()
  }
} 
Example 27
Source File: TransformationUnion.scala    From flink-hairless-notes   with Apache License 2.0 5 votes vote down vote up
package wang.yangting.tech.flink.streaming.scala.operators.transformation

import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}


object TransformationUnion {
  def main(args: Array[String]): Unit = {
      val env = StreamExecutionEnvironment.getExecutionEnvironment

    import org.apache.flink.api.scala._
    val dataStream1:DataStream[(String,Int)]=env.fromElements(("a",1),("b",2),("c",3),("d",4),("e",5))
    val dataStream2:DataStream[(String,Int)]=env.fromElements(("a",6),("b",7),("c",8),("d",9),("e",10))
    val dataStream3:DataStream[(String,Int)]=env.fromElements(("a",11),("b",12),("c",13),("d",14),("e",15))
    val unionStream = dataStream1.union(dataStream2)
    val  unionStream2 = unionStream.union(dataStream3)
    unionStream2.print()
    env.execute()
  }
} 
Example 28
Source File: TransformationSplitAndSelect.scala    From flink-hairless-notes   with Apache License 2.0 5 votes vote down vote up
package wang.yangting.tech.flink.streaming.scala.operators.transformation

import org.apache.flink.streaming.api.scala.{DataStream, SplitStream, StreamExecutionEnvironment}


object TransformationSplitAndSelect {
  def main(args: Array[String]): Unit = {
      val env  = StreamExecutionEnvironment.getExecutionEnvironment
      import org.apache.flink.api.scala._
      val dataStream1: DataStream[(String,Int)] = env.fromElements(("a",3),("d",4),("c",2),("c",5),("a",5))
      val splitStream:SplitStream[(String,Int)] = dataStream1
        .split(t => if (t._2 % 2 == 0) Seq("even") else Seq("old"))
      val evenStream : DataStream[(String,Int)] = splitStream.select("even")
      val oddStream : DataStream[(String,Int)] = splitStream.select("odd")
      evenStream.print()
      env.execute()

  }
} 
Example 29
Source File: TransformationFilter.scala    From flink-hairless-notes   with Apache License 2.0 5 votes vote down vote up
package wang.yangting.tech.flink.streaming.scala.operators.transformation

import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}


object TransformationFilter {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment

    import org.apache.flink.api.scala._
    val dataStream = env.fromElements(("a",1),("b",2),("a",3),("c",4))
    //筛选出第二个元素是偶数的集合
    val filter:DataStream[(String, Int)]=dataStream.filter(_._2 % 2 ==0)
    filter.print()

    env.execute()
  }
} 
Example 30
Source File: DefaultSaverITCase.scala    From flink-tensorflow   with Apache License 2.0 5 votes vote down vote up
package org.apache.flink.contrib.tensorflow.io

import org.apache.flink.contrib.tensorflow.models.savedmodel.DefaultSavedModelLoader
import org.apache.flink.contrib.tensorflow.util.{FlinkTestBase, RegistrationUtils}
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{Matchers, WordSpecLike}
import org.tensorflow.{Session, Tensor}

import scala.collection.JavaConverters._

@RunWith(classOf[JUnitRunner])
class DefaultSaverITCase extends WordSpecLike
  with Matchers
  with FlinkTestBase {

  override val parallelism = 1

  "A DefaultSaver" should {
    "run the save op" in {
      val env = StreamExecutionEnvironment.getExecutionEnvironment
      RegistrationUtils.registerTypes(env.getConfig)

      val loader = new DefaultSavedModelLoader(new Path("../models/half_plus_two"), "serve")
      val bundle = loader.load()
      val saverDef = loader.metagraph.getSaverDef
      val saver = new DefaultSaver(saverDef)

      def getA = getVariable(bundle.session(), "a").floatValue()
      def setA(value: Float) = setVariable(bundle.session(), "a", Tensor.create(value))

      val initialA = getA
      println("Initial value: " + initialA)

      setA(1.0f)
      val savePath = tempFolder.newFolder("model-0").getAbsolutePath
      val path = saver.save(bundle.session(), savePath)
      val savedA = getA
      savedA shouldBe (1.0f)
      println("Saved value: " + getA)

      setA(2.0f)
      val updatedA = getA
      updatedA shouldBe (2.0f)
      println("Updated value: " + updatedA)

      saver.restore(bundle.session(), path)
      val restoredA = getA
      restoredA shouldBe (savedA)
      println("Restored value: " + restoredA)
    }

    def getVariable(sess: Session, name: String): Tensor = {
      val result = sess.runner().fetch(name).run().asScala
      result.head
    }

    def setVariable(sess: Session, name: String, value: Tensor): Unit = {
      sess.runner()
        .addTarget(s"$name/Assign")
        .feed(s"$name/initial_value", value)
        .run()
    }
  }
}