java.util.StringTokenizer Scala Example

Source File: NumericParser.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import java.util.StringTokenizer

import scala.collection.mutable.{ArrayBuilder, ListBuffer}

import org.apache.spark.SparkException


  def parse(s: String): Any = {
    val tokenizer = new StringTokenizer(s, "()[],", true)
    if (tokenizer.hasMoreTokens()) {
      val token = tokenizer.nextToken()
      if (token == "(") {
        parseTuple(tokenizer)
      } else if (token == "[") {
        parseArray(tokenizer)
      } else {
        // expecting a number
        parseDouble(token)
      }
    } else {
      throw new SparkException(s"Cannot find any token from the input string.")
    }
  }

  private def parseArray(tokenizer: StringTokenizer): Array[Double] = {
    val values = ArrayBuilder.make[Double]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "]") {
        parsing = false
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else {
        // expecting a number
        values += parseDouble(token)
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"An array must end with ']'.")
    }
    values.result()
  }

  private def parseTuple(tokenizer: StringTokenizer): Seq[_] = {
    val items = ListBuffer.empty[Any]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "(") {
        items.append(parseTuple(tokenizer))
        allowComma = true
      } else if (token == "[") {
        items.append(parseArray(tokenizer))
        allowComma = true
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else if (token == ")") {
        parsing = false
      } else if (token.trim.isEmpty) {
          // ignore whitespaces between delim chars, e.g. ", ["
      } else {
        // expecting a number
        items.append(parseDouble(token))
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"A tuple must end with ')'.")
    }
    items
  }

  private def parseDouble(s: String): Double = {
    try {
      java.lang.Double.parseDouble(s)
    } catch {
      case e: NumberFormatException =>
        throw new SparkException(s"Cannot parse a double from: $s", e)
    }
  }
}

Source File: HmLabeledPoint.scala From hivemall-spark with Apache License 2.0

5 votes

package org.apache.spark.ml.feature

import java.util.StringTokenizer

import scala.collection.mutable.ListBuffer

import hivemall.HivemallException

// Used for DataFrame#explode
case class HmFeature(feature: String)


case class HmLabeledPoint(label: Float = 0.0f, features: Seq[String]) {
  override def toString: String = {
    "%s,%s".format(label, features.mkString("[", ",", "]"))
  }
}

object HmLabeledPoint {

  // Simple parser for HivemallLabeledPoint
  def parse(s: String) = {
    val (label, features) = s.indexOf(',') match {
      case d if d > 0 => (s.substring(0, d), s.substring(d + 1))
      case _ => ("0.0", "[]") // Dummy
    }
    HmLabeledPoint(label.toFloat, parseTuple(new StringTokenizer(features, "[],", true)))
  }

  // TODO: Support to parse rows without labels
  private[this] def parseTuple(tokenizer: StringTokenizer): Seq[String] = {
    val items = ListBuffer.empty[String]
    var parsing = true
    var allowDelim = false
    while (parsing && tokenizer.hasMoreTokens()) {
      val token = tokenizer.nextToken()
      if (token == "[") {
        items ++= parseTuple(tokenizer)
        parsing = false
        allowDelim = true
      } else if (token == ",") {
        if (allowDelim) {
          allowDelim = false
        } else {
          throw new HivemallException("Found ',' at a wrong position.")
        }
      } else if (token == "]") {
        parsing = false
      } else {
        items.append(token)
        allowDelim = true
      }
    }
    if (parsing) {
      throw new HivemallException(s"A tuple must end with ']'.")
    }
    items
  }
}

Source File: NumericParser.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import java.util.StringTokenizer

import scala.collection.mutable.{ArrayBuilder, ListBuffer}

import org.apache.spark.SparkException


  def parse(s: String): Any = {
    val tokenizer = new StringTokenizer(s, "()[],", true)
    if (tokenizer.hasMoreTokens()) {
      val token = tokenizer.nextToken()
      if (token == "(") {
        parseTuple(tokenizer)
      } else if (token == "[") {
        parseArray(tokenizer)
      } else {
        // expecting a number
        parseDouble(token)
      }
    } else {
      throw new SparkException(s"Cannot find any token from the input string.")
    }
  }

  private def parseArray(tokenizer: StringTokenizer): Array[Double] = {
    val values = ArrayBuilder.make[Double]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "]") {
        parsing = false
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else {
        // expecting a number
        values += parseDouble(token)
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"An array must end with ']'.")
    }
    values.result()
  }

  private def parseTuple(tokenizer: StringTokenizer): Seq[_] = {
    val items = ListBuffer.empty[Any]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "(") {
        items.append(parseTuple(tokenizer))
        allowComma = true
      } else if (token == "[") {
        items.append(parseArray(tokenizer))
        allowComma = true
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else if (token == ")") {
        parsing = false
      } else if (token.trim.isEmpty) {
          // ignore whitespaces between delim chars, e.g. ", ["
      } else {
        // expecting a number
        items.append(parseDouble(token))
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"A tuple must end with ')'.")
    }
    items
  }

  private def parseDouble(s: String): Double = {
    try {
      java.lang.Double.parseDouble(s)
    } catch {
      case e: NumberFormatException =>
        throw new SparkException(s"Cannot parse a double from: $s", e)
    }
  }
}

Source File: RPNParser.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.behavioral.interpreter

import java.util.StringTokenizer

import scala.collection.JavaConverters._
import scala.collection.mutable

class RPNParser {

  def parse(expression: String): Expression = {
    val tokenizer = new StringTokenizer(expression)
    tokenizer.asScala.foldLeft(mutable.Stack[Expression]()) {
      case (result, token) =>
        val item = Expression(token.toString, result.pop(), result.pop())
        item.foreach(result.push)
        result
    }.pop()
  }
}

class RPNInterpreter {
  def interpret(expression: Expression): Int = expression.interpret()
}

object RPNExample {
  def main(args: Array[String]): Unit = {
    val expr1 = "1 2 + 3 * 9 10 + -" // (1 + 2) * 3 - (9 + 10) = -10
    val expr2 = "1 2 3 4 5 * * - +" // 1 + 2 - 3 * 4 * 5 = -57
    val expr3 = "12 -" // invalid
    val parser = new RPNParser
    val interpreter = new RPNInterpreter
    
    System.out.println(s"The result of '${expr1}' is: ${interpreter.interpret(parser.parse(expr1))}")
    System.out.println(s"The result of '${expr2}' is: ${interpreter.interpret(parser.parse(expr2))}")
    try {
      System.out.println(s"The result is: ${interpreter.interpret(parser.parse(expr3))}")
    } catch {
      case _: Throwable => System.out.println(s"'$expr3' is invalid.")
    }
  }
}

Source File: RPNParser.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.behavioral.interpreter

import java.util.StringTokenizer

import scala.collection.JavaConverters._
import scala.collection.mutable

class RPNParser {

  def parse(expression: String): Expression = {
    val tokenizer = new StringTokenizer(expression)
    tokenizer.asScala.foldLeft(mutable.Stack[Expression]()) {
      case (result, token) =>
        val item = Expression(token.toString, result.pop(), result.pop())
        item.foreach(result.push)
        result
    }.pop()
  }
}

class RPNInterpreter {
  def interpret(expression: Expression): Int = expression.interpret()
}

object RPNExample {
  def main(args: Array[String]): Unit = {
    val expr1 = "1 2 + 3 * 9 10 + -" // (1 + 2) * 3 - (9 + 10) = -10
    val expr2 = "1 2 3 4 5 * * - +" // 1 + 2 - 3 * 4 * 5 = -57
    val expr3 = "12 -" // invalid
    val parser = new RPNParser
    val interpreter = new RPNInterpreter
    
    System.out.println(s"The result of '${expr1}' is: ${interpreter.interpret(parser.parse(expr1))}")
    System.out.println(s"The result of '${expr2}' is: ${interpreter.interpret(parser.parse(expr2))}")
    try {
      System.out.println(s"The result is: ${interpreter.interpret(parser.parse(expr3))}")
    } catch {
      case _: Throwable => System.out.println(s"'$expr3' is invalid.")
    }
  }
}

Source File: RPNParser.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.behavioral.interpreter

import java.util.StringTokenizer

import scala.collection.JavaConverters._
import scala.collection.mutable

class RPNParser {

  def parse(expression: String): Expression = {
    val tokenizer = new StringTokenizer(expression)
    tokenizer.asScala.foldLeft(mutable.Stack[Expression]()) {
      case (result, token) =>
        val item = Expression(token.toString, result.pop(), result.pop())
        item.foreach(result.push)
        result
    }.pop()
  }
}

class RPNInterpreter {
  def interpret(expression: Expression): Int = expression.interpret()
}

object RPNExample {
  def main(args: Array[String]): Unit = {
    val expr1 = "1 2 + 3 * 9 10 + -" // (1 + 2) * 3 - (9 + 10) = -10
    val expr2 = "1 2 3 4 5 * * - +" // 1 + 2 - 3 * 4 * 5 = -57
    val expr3 = "12 -" // invalid
    val parser = new RPNParser
    val interpreter = new RPNInterpreter
    
    System.out.println(s"The result of '${expr1}' is: ${interpreter.interpret(parser.parse(expr1))}")
    System.out.println(s"The result of '${expr2}' is: ${interpreter.interpret(parser.parse(expr2))}")
    try {
      System.out.println(s"The result is: ${interpreter.interpret(parser.parse(expr3))}")
    } catch {
      case _: Throwable => System.out.println(s"'$expr3' is invalid.")
    }
  }
}

Source File: RPNParser.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.behavioral.interpreter

import java.util.StringTokenizer

import scala.collection.JavaConverters._
import scala.collection.mutable

class RPNParser {

  def parse(expression: String): Expression = {
    val tokenizer = new StringTokenizer(expression)
    tokenizer.asScala.foldLeft(mutable.Stack[Expression]()) {
      case (result, token) =>
        val item = Expression(token.toString, result.pop(), result.pop())
        item.foreach(result.push)
        result
    }.pop()
  }
}

class RPNInterpreter {
  def interpret(expression: Expression): Int = expression.interpret()
}

object RPNExample {
  def main(args: Array[String]): Unit = {
    val expr1 = "1 2 + 3 * 9 10 + -" // (1 + 2) * 3 - (9 + 10) = -10
    val expr2 = "1 2 3 4 5 * * - +" // 1 + 2 - 3 * 4 * 5 = -57
    val expr3 = "12 -" // invalid
    val parser = new RPNParser
    val interpreter = new RPNInterpreter
    
    System.out.println(s"The result of '${expr1}' is: ${interpreter.interpret(parser.parse(expr1))}")
    System.out.println(s"The result of '${expr2}' is: ${interpreter.interpret(parser.parse(expr2))}")
    try {
      System.out.println(s"The result is: ${interpreter.interpret(parser.parse(expr3))}")
    } catch {
      case _: Throwable => System.out.println(s"'$expr3' is invalid.")
    }
  }
}

Source File: SentenceParserTokenize.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.duck

import java.util.StringTokenizer

class SentenceParserTokenize {
  
  def parse(sentence: String): Array[String] = {
    val tokenizer = new StringTokenizer(sentence)
    Iterator.continually({
      val hasMore = tokenizer.hasMoreTokens
      if (hasMore) {
        (hasMore, tokenizer.nextToken())
      } else {
        (hasMore, null)
      }
    }).takeWhile(_._1).map(_._2).toArray
  }
    
}

Source File: SentenceParserTokenize.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.duck

import java.util.StringTokenizer

class SentenceParserTokenize {
  
  def parse(sentence: String): Array[String] = {
    val tokenizer = new StringTokenizer(sentence)
    Iterator.continually({
      val hasMore = tokenizer.hasMoreTokens
      if (hasMore) {
        (hasMore, tokenizer.nextToken())
      } else {
        (hasMore, null)
      }
    }).takeWhile(_._1).map(_._2).toArray
  }
    
}

Source File: NumericParser.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import java.util.StringTokenizer

import scala.collection.mutable.{ArrayBuilder, ListBuffer}

import org.apache.spark.SparkException


  def parse(s: String): Any = {
    val tokenizer = new StringTokenizer(s, "()[],", true)
    if (tokenizer.hasMoreTokens()) {
      val token = tokenizer.nextToken()
      if (token == "(") {
        parseTuple(tokenizer)
      } else if (token == "[") {
        parseArray(tokenizer)
      } else {
        // expecting a number
        parseDouble(token)
      }
    } else {
      throw new SparkException(s"Cannot find any token from the input string.")
    }
  }

  private def parseArray(tokenizer: StringTokenizer): Array[Double] = {
    val values = ArrayBuilder.make[Double]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "]") {
        parsing = false
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else {
        // expecting a number
        values += parseDouble(token)
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"An array must end with ']'.")
    }
    values.result()
  }

  private def parseTuple(tokenizer: StringTokenizer): Seq[_] = {
    val items = ListBuffer.empty[Any]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "(") {
        items.append(parseTuple(tokenizer))
        allowComma = true
      } else if (token == "[") {
        items.append(parseArray(tokenizer))
        allowComma = true
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else if (token == ")") {
        parsing = false
      } else if (token.trim.isEmpty) {
          // ignore whitespaces between delim chars, e.g. ", ["
      } else {
        // expecting a number
        items.append(parseDouble(token))
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"A tuple must end with ')'.")
    }
    items
  }

  private def parseDouble(s: String): Double = {
    try {
      java.lang.Double.parseDouble(s)
    } catch {
      case e: NumberFormatException =>
        throw new SparkException(s"Cannot parse a double from: $s", e)
    }
  }
}

Source File: NumericParser.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import java.util.StringTokenizer

import scala.collection.mutable.{ArrayBuilder, ListBuffer}

import org.apache.spark.SparkException


  def parse(s: String): Any = {
    val tokenizer = new StringTokenizer(s, "()[],", true)
    if (tokenizer.hasMoreTokens()) {
      val token = tokenizer.nextToken()
      if (token == "(") {
        parseTuple(tokenizer)
      } else if (token == "[") {
        parseArray(tokenizer)
      } else {
        // expecting a number
        parseDouble(token)
      }
    } else {
      throw new SparkException(s"Cannot find any token from the input string.")
    }
  }

  private def parseArray(tokenizer: StringTokenizer): Array[Double] = {
    val values = ArrayBuilder.make[Double]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "]") {
        parsing = false
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else {
        // expecting a number
        values += parseDouble(token)
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"An array must end with ']'.")
    }
    values.result()
  }

  private def parseTuple(tokenizer: StringTokenizer): Seq[_] = {
    val items = ListBuffer.empty[Any]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "(") {
        items.append(parseTuple(tokenizer))
        allowComma = true
      } else if (token == "[") {
        items.append(parseArray(tokenizer))
        allowComma = true
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else if (token == ")") {
        parsing = false
      } else if (token.trim.isEmpty){
          // ignore whitespaces between delim chars, e.g. ", ["
      } else {
        // expecting a number
        items.append(parseDouble(token))
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"A tuple must end with ')'.")
    }
    items
  }

  private def parseDouble(s: String): Double = {
    try {
      java.lang.Double.parseDouble(s)
    } catch {
      case e: NumberFormatException =>
        throw new SparkException(s"Cannot parse a double from: $s", e)
    }
  }
}

Source File: NumericParser.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import java.util.StringTokenizer

import scala.collection.mutable.{ArrayBuilder, ListBuffer}

import org.apache.spark.SparkException


  def parse(s: String): Any = {
    val tokenizer = new StringTokenizer(s, "()[],", true)
    if (tokenizer.hasMoreTokens()) {
      val token = tokenizer.nextToken()
      if (token == "(") {
        parseTuple(tokenizer)
      } else if (token == "[") {
        parseArray(tokenizer)
      } else {
        // expecting a number
        parseDouble(token)
      }
    } else {
      throw new SparkException(s"Cannot find any token from the input string.")
    }
  }

  private def parseArray(tokenizer: StringTokenizer): Array[Double] = {
    val values = ArrayBuilder.make[Double]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "]") {
        parsing = false
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else {
        // expecting a number
        values += parseDouble(token)
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"An array must end with ']'.")
    }
    values.result()
  }

  private def parseTuple(tokenizer: StringTokenizer): Seq[_] = {
    val items = ListBuffer.empty[Any]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "(") {
        items.append(parseTuple(tokenizer))
        allowComma = true
      } else if (token == "[") {
        items.append(parseArray(tokenizer))
        allowComma = true
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else if (token == ")") {
        parsing = false
      } else if (token.trim.isEmpty){
          // ignore whitespaces between delim chars, e.g. ", ["
      } else {
        // expecting a number
        items.append(parseDouble(token))
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"A tuple must end with ')'.")
    }
    items
  }

  private def parseDouble(s: String): Double = {
    try {
      java.lang.Double.parseDouble(s)
    } catch {
      case e: NumberFormatException =>
        throw new SparkException(s"Cannot parse a double from: $s", e)
    }
  }
}

Source File: NumericParser.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import java.util.StringTokenizer

import scala.collection.mutable.{ArrayBuilder, ListBuffer}

import org.apache.spark.SparkException


  def parse(s: String): Any = {
    val tokenizer = new StringTokenizer(s, "()[],", true)
    if (tokenizer.hasMoreTokens()) {
      val token = tokenizer.nextToken()
      if (token == "(") {
        parseTuple(tokenizer)
      } else if (token == "[") {
        parseArray(tokenizer)
      } else {
        // expecting a number
        parseDouble(token)
      }
    } else {
      throw new SparkException(s"Cannot find any token from the input string.")
    }
  }

  private def parseArray(tokenizer: StringTokenizer): Array[Double] = {
    val values = ArrayBuilder.make[Double]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "]") {
        parsing = false
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else {
        // expecting a number
        values += parseDouble(token)
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"An array must end with ']'.")
    }
    values.result()
  }

  private def parseTuple(tokenizer: StringTokenizer): Seq[_] = {
    val items = ListBuffer.empty[Any]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "(") {
        items.append(parseTuple(tokenizer))
        allowComma = true
      } else if (token == "[") {
        items.append(parseArray(tokenizer))
        allowComma = true
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else if (token == ")") {
        parsing = false
      } else if (token.trim.isEmpty) {
          // ignore whitespaces between delim chars, e.g. ", ["
      } else {
        // expecting a number
        items.append(parseDouble(token))
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"A tuple must end with ')'.")
    }
    items
  }

  private def parseDouble(s: String): Double = {
    try {
      java.lang.Double.parseDouble(s)
    } catch {
      case e: NumberFormatException =>
        throw new SparkException(s"Cannot parse a double from: $s", e)
    }
  }
}

Source File: NumericParser.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import java.util.StringTokenizer

import scala.collection.mutable.{ArrayBuilder, ListBuffer}

import org.apache.spark.SparkException


  def parse(s: String): Any = {
    val tokenizer = new StringTokenizer(s, "()[],", true)
    if (tokenizer.hasMoreTokens()) {
      val token = tokenizer.nextToken()
      if (token == "(") {
        parseTuple(tokenizer)
      } else if (token == "[") {
        parseArray(tokenizer)
      } else {
        // expecting a number
        parseDouble(token)
      }
    } else {
      throw new SparkException(s"Cannot find any token from the input string.")
    }
  }

  private def parseArray(tokenizer: StringTokenizer): Array[Double] = {
    val values = ArrayBuilder.make[Double]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "]") {
        parsing = false
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else {
        // expecting a number
        values += parseDouble(token)
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"An array must end with ']'.")
    }
    values.result()
  }

  private def parseTuple(tokenizer: StringTokenizer): Seq[_] = {
    val items = ListBuffer.empty[Any]
    var parsing = true
    var allowComma = false
    var token: String = null
    while (parsing && tokenizer.hasMoreTokens()) {
      token = tokenizer.nextToken()
      if (token == "(") {
        items.append(parseTuple(tokenizer))
        allowComma = true
      } else if (token == "[") {
        items.append(parseArray(tokenizer))
        allowComma = true
      } else if (token == ",") {
        if (allowComma) {
          allowComma = false
        } else {
          throw new SparkException("Found a ',' at a wrong position.")
        }
      } else if (token == ")") {
        parsing = false
      } else if (token.trim.isEmpty){
          // ignore whitespaces between delim chars, e.g. ", ["
      } else {
        // expecting a number
        items.append(parseDouble(token))
        allowComma = true
      }
    }
    if (parsing) {
      throw new SparkException(s"A tuple must end with ')'.")
    }
    items
  }

  private def parseDouble(s: String): Double = {
    try {
      java.lang.Double.parseDouble(s)
    } catch {
      case e: NumberFormatException =>
        throw new SparkException(s"Cannot parse a double from: $s", e)
    }
  }
}

Source File: SQLServerUtils.scala From spark-sql-server with Apache License 2.0

5 votes

package org.apache.spark.sql.server.util

import java.io.File
import java.lang.reflect.Field
import java.util.StringTokenizer

import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.server.SQLServerConf._
import org.apache.spark.sql.server.SQLServerEnv
import org.apache.spark.util.Utils

object SQLServerUtils {

  
  def isTesting: Boolean = {
    SQLServerEnv.sparkConf.contains("spark.sql.server.testing") &&
      SQLServerEnv.sparkConf.get("spark.sql.server.testing") == "true"
  }

  def isRunningOnYarn(conf: SQLConf): Boolean = {
    conf.settings.get("spark.master").startsWith("yarn")
  }

  def isKerberosEnabled(conf: SQLConf): Boolean = {
    require(!conf.sqlServerImpersonationEnabled || conf.sqlServerExecutionMode == "multi-context",
      "Impersonation can be enabled in multi-context mode only")
    conf.contains("spark.yarn.keytab") && conf.contains("spark.yarn.principal")
  }

  def kerberosKeytab(conf: SQLConf): String = {
    val key = "spark.yarn.keytab"
    val keytabFilename = conf.getConfString(key)
    require(keytabFilename != null, s"Kerberos requires `$key` to be provided.")
    keytabFilename
  }

  def kerberosPrincipal(conf: SQLConf): String = {
    val key = "spark.yarn.principal"
    val principalName = conf.getConfString(key)
    require(principalName != null, s"Kerberos requires `$key` to be provided.")
    principalName
  }

  def findFileOnClassPath(fileName: String): Option[File] = {
    val classpath = System.getProperty("java.class.path")
    val pathSeparator = System.getProperty("path.separator")
    val tokenizer = new StringTokenizer(classpath, pathSeparator)
    while (tokenizer.hasMoreTokens) {
      val pathElement = tokenizer.nextToken()
      val directoryOrJar = new File(pathElement)
      val absoluteDirectoryOrJar = directoryOrJar.getAbsoluteFile
      if (absoluteDirectoryOrJar.isFile) {
        val target = new File(absoluteDirectoryOrJar.getParent, fileName)
        if (target.exists()) {
          return Some(target)
        }
      } else {
        val target = new File(directoryOrJar, fileName)
        if (target.exists()) {
          return Some(target)
        }
      }
    }
    None
  }

  // https://blog.sebastian-daschner.com/entries/changing_env_java
  def injectEnvVar(key: String, value: String): Unit = {
    val clazz = Utils.classForName("java.lang.ProcessEnvironment")
    injectIntoUnmodifiableMap(key, value, clazz)
  }

  private def getDeclaredField(clazz: Class[_], fieldName: String): Field = {
    val field = clazz.getDeclaredField(fieldName)
    field.setAccessible(true)
    field
  }

  private def injectIntoUnmodifiableMap(key: String, value: String, clazz: Class[_]): Unit = {
    val unmodifiableEnvField = getDeclaredField(clazz, "theUnmodifiableEnvironment")
    val unmodifiableEnv = unmodifiableEnvField.get(null)
    val unmodifiableMapClazz = Utils.classForName("java.util.Collections$UnmodifiableMap")
    val field = getDeclaredField(unmodifiableMapClazz, "m")
    field.get(unmodifiableEnv).asInstanceOf[java.util.Map[String, String]].put(key, value)
  }
}

java.util.StringTokenizer Scala Examples