org.apache.spark.mllib.regression.GeneralizedLinearModel Scala Examples

The following examples show how to use org.apache.spark.mllib.regression.GeneralizedLinearModel. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: GeneralizedLinearPMMLModelExport.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = {
    pmml.getHeader.setDescription(description)

    if (model.weights.size > 0) {
      val fields = new SArray[FieldName](model.weights.size)
      val dataDictionary = new DataDictionary
      val miningSchema = new MiningSchema
      val regressionTable = new RegressionTable(model.intercept)
      val regressionModel = new RegressionModel()
        .setFunctionName(MiningFunctionType.REGRESSION)
        .setMiningSchema(miningSchema)
        .setModelName(description)
        .addRegressionTables(regressionTable)

      for (i <- 0 until model.weights.size) {
        fields(i) = FieldName.create("field_" + i)
        dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
        miningSchema
          .addMiningFields(new MiningField(fields(i))
          .setUsageType(FieldUsageType.ACTIVE))
        regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
      }

      // for completeness add target field
      val targetField = FieldName.create("target")
      dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
      miningSchema
        .addMiningFields(new MiningField(targetField)
        .setUsageType(FieldUsageType.TARGET))

      dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)

      pmml.setDataDictionary(dataDictionary)
      pmml.addModels(regressionModel)
    }
  }
} 
Example 2
Source File: BinaryClassificationPMMLModelExport.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateBinaryClassificationPMML(): Unit = {
     pmml.getHeader.setDescription(description)

     if (model.weights.size > 0) {
       val fields = new SArray[FieldName](model.weights.size)
       val dataDictionary = new DataDictionary
       val miningSchema = new MiningSchema
       val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1")
       var interceptNO = threshold
       if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) {
         if (threshold <= 0) {
           interceptNO = Double.MinValue
         } else if (threshold >= 1) {
           interceptNO = Double.MaxValue
         } else {
           interceptNO = -math.log(1 / threshold - 1)
         }
       }
       val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0")
       val regressionModel = new RegressionModel()
         .setFunctionName(MiningFunctionType.CLASSIFICATION)
         .setMiningSchema(miningSchema)
         .setModelName(description)
         .setNormalizationMethod(normalizationMethod)
         .addRegressionTables(regressionTableYES, regressionTableNO)

       for (i <- 0 until model.weights.size) {
         fields(i) = FieldName.create("field_" + i)
         dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
         miningSchema
           .addMiningFields(new MiningField(fields(i))
           .setUsageType(FieldUsageType.ACTIVE))
         regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
       }

       // add target field
       val targetField = FieldName.create("target")
       dataDictionary
         .addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
       miningSchema
         .addMiningFields(new MiningField(targetField)
         .setUsageType(FieldUsageType.TARGET))

       dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)

       pmml.setDataDictionary(dataDictionary)
       pmml.addModels(regressionModel)
     }
  }
} 
Example 3
Source File: GeneralizedLinearPMMLModelExport.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = {
    pmml.getHeader.setDescription(description)

    if (model.weights.size > 0) {
      val fields = new SArray[FieldName](model.weights.size)
      val dataDictionary = new DataDictionary
      val miningSchema = new MiningSchema
      val regressionTable = new RegressionTable(model.intercept)
      val regressionModel = new RegressionModel()
        .setFunctionName(MiningFunctionType.REGRESSION)
        .setMiningSchema(miningSchema)
        .setModelName(description)
        .addRegressionTables(regressionTable)

      for (i <- 0 until model.weights.size) {
        fields(i) = FieldName.create("field_" + i)
        dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
        miningSchema
          .addMiningFields(new MiningField(fields(i))
          .setUsageType(FieldUsageType.ACTIVE))
        regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
      }

      // for completeness add target field
      val targetField = FieldName.create("target")
      dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
      miningSchema
        .addMiningFields(new MiningField(targetField)
        .setUsageType(FieldUsageType.TARGET))

      dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)

      pmml.setDataDictionary(dataDictionary)
      pmml.addModels(regressionModel)
    }
  }
} 
Example 4
Source File: BinaryClassificationPMMLModelExport.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateBinaryClassificationPMML(): Unit = {
     pmml.getHeader.setDescription(description)

     if (model.weights.size > 0) {
       val fields = new SArray[FieldName](model.weights.size)
       val dataDictionary = new DataDictionary
       val miningSchema = new MiningSchema
       val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1")
       var interceptNO = threshold
       if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) {
         if (threshold <= 0) {
           interceptNO = Double.MinValue
         } else if (threshold >= 1) {
           interceptNO = Double.MaxValue
         } else {
           interceptNO = -math.log(1 / threshold - 1)
         }
       }
       val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0")
       val regressionModel = new RegressionModel()
         .setFunctionName(MiningFunctionType.CLASSIFICATION)
         .setMiningSchema(miningSchema)
         .setModelName(description)
         .setNormalizationMethod(normalizationMethod)
         .addRegressionTables(regressionTableYES, regressionTableNO)

       for (i <- 0 until model.weights.size) {
         fields(i) = FieldName.create("field_" + i)
         dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
         miningSchema
           .addMiningFields(new MiningField(fields(i))
           .setUsageType(FieldUsageType.ACTIVE))
         regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
       }

       // add target field
       val targetField = FieldName.create("target")
       dataDictionary
         .addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
       miningSchema
         .addMiningFields(new MiningField(targetField)
         .setUsageType(FieldUsageType.TARGET))

       dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)

       pmml.setDataDictionary(dataDictionary)
       pmml.addModels(regressionModel)
     }
  }
} 
Example 5
Source File: GeneralizedLinearPMMLModelExport.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = {
    pmml.getHeader.setDescription(description)

    if (model.weights.size > 0) {
      val fields = new SArray[FieldName](model.weights.size)
      val dataDictionary = new DataDictionary
      val miningSchema = new MiningSchema
      val regressionTable = new RegressionTable(model.intercept)
      val regressionModel = new RegressionModel()
        .setFunctionName(MiningFunctionType.REGRESSION)
        .setMiningSchema(miningSchema)
        .setModelName(description)
        .addRegressionTables(regressionTable)

      for (i <- 0 until model.weights.size) {
        fields(i) = FieldName.create("field_" + i)
        dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
        miningSchema
          .addMiningFields(new MiningField(fields(i))
          .setUsageType(FieldUsageType.ACTIVE))
        regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
      }

      // for completeness add target field
      val targetField = FieldName.create("target")
      dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
      miningSchema
        .addMiningFields(new MiningField(targetField)
        .setUsageType(FieldUsageType.TARGET))

      dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)

      pmml.setDataDictionary(dataDictionary)
      pmml.addModels(regressionModel)
    }
  }
} 
Example 6
Source File: BinaryClassificationPMMLModelExport.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateBinaryClassificationPMML(): Unit = {
     pmml.getHeader.setDescription(description)

     if (model.weights.size > 0) {
       val fields = new SArray[FieldName](model.weights.size)
       val dataDictionary = new DataDictionary
       val miningSchema = new MiningSchema
       val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1")
       var interceptNO = threshold
       if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) {
         if (threshold <= 0) {
           interceptNO = Double.MinValue
         } else if (threshold >= 1) {
           interceptNO = Double.MaxValue
         } else {
           interceptNO = -math.log(1 / threshold - 1)
         }
       }
       val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0")
       val regressionModel = new RegressionModel()
         .setFunctionName(MiningFunctionType.CLASSIFICATION)
         .setMiningSchema(miningSchema)
         .setModelName(description)
         .setNormalizationMethod(normalizationMethod)
         .addRegressionTables(regressionTableYES, regressionTableNO)

       for (i <- 0 until model.weights.size) {
         fields(i) = FieldName.create("field_" + i)
         dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
         miningSchema
           .addMiningFields(new MiningField(fields(i))
           .setUsageType(FieldUsageType.ACTIVE))
         regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
       }

       // add target field
       val targetField = FieldName.create("target")
       dataDictionary
         .addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
       miningSchema
         .addMiningFields(new MiningField(targetField)
         .setUsageType(FieldUsageType.TARGET))

       dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)

       pmml.setDataDictionary(dataDictionary)
       pmml.addModels(regressionModel)
     }
  }
} 
Example 7
Source File: GeneralizedLinearPMMLModelExport.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = {
    pmml.getHeader.setDescription(description)

    if (model.weights.size > 0) {
      val fields = new SArray[FieldName](model.weights.size)
      val dataDictionary = new DataDictionary
      val miningSchema = new MiningSchema
      val regressionTable = new RegressionTable(model.intercept)
      val regressionModel = new RegressionModel()
        .withFunctionName(MiningFunctionType.REGRESSION)
        .withMiningSchema(miningSchema)
        .withModelName(description)
        .withRegressionTables(regressionTable)

      for (i <- 0 until model.weights.size) {
        fields(i) = FieldName.create("field_" + i)
        dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
        miningSchema
          .withMiningFields(new MiningField(fields(i))
          .withUsageType(FieldUsageType.ACTIVE))
        regressionTable.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
      }

      // for completeness add target field
      val targetField = FieldName.create("target")
      dataDictionary.withDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
      miningSchema
        .withMiningFields(new MiningField(targetField)
        .withUsageType(FieldUsageType.TARGET))

      dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)

      pmml.setDataDictionary(dataDictionary)
      pmml.withModels(regressionModel)
    }
  }
} 
Example 8
Source File: BinaryClassificationPMMLModelExport.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateBinaryClassificationPMML(): Unit = {
     pmml.getHeader.setDescription(description)

     if (model.weights.size > 0) {
       val fields = new SArray[FieldName](model.weights.size)
       val dataDictionary = new DataDictionary
       val miningSchema = new MiningSchema
       val regressionTableYES = new RegressionTable(model.intercept).withTargetCategory("1")
       var interceptNO = threshold
       if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) {
         if (threshold <= 0) {
           interceptNO = Double.MinValue
         } else if (threshold >= 1) {
           interceptNO = Double.MaxValue
         } else {
           interceptNO = -math.log(1 / threshold - 1)
         }
       }
       val regressionTableNO = new RegressionTable(interceptNO).withTargetCategory("0")
       val regressionModel = new RegressionModel()
         .withFunctionName(MiningFunctionType.CLASSIFICATION)
         .withMiningSchema(miningSchema)
         .withModelName(description)
         .withNormalizationMethod(normalizationMethod)
         .withRegressionTables(regressionTableYES, regressionTableNO)

       for (i <- 0 until model.weights.size) {
         fields(i) = FieldName.create("field_" + i)
         dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
         miningSchema
           .withMiningFields(new MiningField(fields(i))
           .withUsageType(FieldUsageType.ACTIVE))
         regressionTableYES.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
       }

       // add target field
       val targetField = FieldName.create("target")
       dataDictionary
         .withDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
       miningSchema
         .withMiningFields(new MiningField(targetField)
         .withUsageType(FieldUsageType.TARGET))

       dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)

       pmml.setDataDictionary(dataDictionary)
       pmml.withModels(regressionModel)
     }
  }
} 
Example 9
Source File: GeneralizedLinearPMMLModelExport.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = {
    pmml.getHeader.setDescription(description)

    if (model.weights.size > 0) {
      val fields = new SArray[FieldName](model.weights.size)
      val dataDictionary = new DataDictionary
      val miningSchema = new MiningSchema
      val regressionTable = new RegressionTable(model.intercept)
      val regressionModel = new RegressionModel()
        .withFunctionName(MiningFunctionType.REGRESSION)
        .withMiningSchema(miningSchema)
        .withModelName(description)
        .withRegressionTables(regressionTable)

      for (i <- 0 until model.weights.size) {
        fields(i) = FieldName.create("field_" + i)
        dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
        miningSchema
          .withMiningFields(new MiningField(fields(i))
          .withUsageType(FieldUsageType.ACTIVE))
        regressionTable.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
      }

      // for completeness add target field
      val targetField = FieldName.create("target")
      dataDictionary.withDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
      miningSchema
        .withMiningFields(new MiningField(targetField)
        .withUsageType(FieldUsageType.TARGET))

      dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)

      pmml.setDataDictionary(dataDictionary)
      pmml.withModels(regressionModel)
    }
  }
} 
Example 10
Source File: BinaryClassificationPMMLModelExport.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateBinaryClassificationPMML(): Unit = {
     pmml.getHeader.setDescription(description)

     if (model.weights.size > 0) {
       val fields = new SArray[FieldName](model.weights.size)
       val dataDictionary = new DataDictionary
       val miningSchema = new MiningSchema
       val regressionTableYES = new RegressionTable(model.intercept).withTargetCategory("1")
       var interceptNO = threshold
       if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) {
         if (threshold <= 0) {
           interceptNO = Double.MinValue
         } else if (threshold >= 1) {
           interceptNO = Double.MaxValue
         } else {
           interceptNO = -math.log(1 / threshold - 1)
         }
       }
       val regressionTableNO = new RegressionTable(interceptNO).withTargetCategory("0")
       val regressionModel = new RegressionModel()
         .withFunctionName(MiningFunctionType.CLASSIFICATION)
         .withMiningSchema(miningSchema)
         .withModelName(description)
         .withNormalizationMethod(normalizationMethod)
         .withRegressionTables(regressionTableYES, regressionTableNO)

       for (i <- 0 until model.weights.size) {
         fields(i) = FieldName.create("field_" + i)
         dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
         miningSchema
           .withMiningFields(new MiningField(fields(i))
           .withUsageType(FieldUsageType.ACTIVE))
         regressionTableYES.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
       }

       // add target field
       val targetField = FieldName.create("target")
       dataDictionary
         .withDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
       miningSchema
         .withMiningFields(new MiningField(targetField)
         .withUsageType(FieldUsageType.TARGET))

       dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)

       pmml.setDataDictionary(dataDictionary)
       pmml.withModels(regressionModel)
     }
  }
} 
Example 11
Source File: GeneralizedLinearPMMLModelExport.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = {
    pmml.getHeader.setDescription(description)

    if (model.weights.size > 0) {
      val fields = new SArray[FieldName](model.weights.size)
      val dataDictionary = new DataDictionary
      val miningSchema = new MiningSchema
      val regressionTable = new RegressionTable(model.intercept)
      val regressionModel = new RegressionModel()
        .setFunctionName(MiningFunctionType.REGRESSION)
        .setMiningSchema(miningSchema)
        .setModelName(description)
        .addRegressionTables(regressionTable)

      for (i <- 0 until model.weights.size) {
        fields(i) = FieldName.create("field_" + i)
        dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
        miningSchema
          .addMiningFields(new MiningField(fields(i))
          .setUsageType(FieldUsageType.ACTIVE))
        regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
      }

      // for completeness add target field
      val targetField = FieldName.create("target")
      dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
      miningSchema
        .addMiningFields(new MiningField(targetField)
        .setUsageType(FieldUsageType.TARGET))

      dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)

      pmml.setDataDictionary(dataDictionary)
      pmml.addModels(regressionModel)
    }
  }
} 
Example 12
Source File: BinaryClassificationPMMLModelExport.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateBinaryClassificationPMML(): Unit = {
     pmml.getHeader.setDescription(description)

     if (model.weights.size > 0) {
       val fields = new SArray[FieldName](model.weights.size)
       val dataDictionary = new DataDictionary
       val miningSchema = new MiningSchema
       val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1")
       var interceptNO = threshold
       if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) {
         if (threshold <= 0) {
           interceptNO = Double.MinValue
         } else if (threshold >= 1) {
           interceptNO = Double.MaxValue
         } else {
           interceptNO = -math.log(1 / threshold - 1)
         }
       }
       val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0")
       val regressionModel = new RegressionModel()
         .setFunctionName(MiningFunctionType.CLASSIFICATION)
         .setMiningSchema(miningSchema)
         .setModelName(description)
         .setNormalizationMethod(normalizationMethod)
         .addRegressionTables(regressionTableYES, regressionTableNO)

       for (i <- 0 until model.weights.size) {
         fields(i) = FieldName.create("field_" + i)
         dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
         miningSchema
           .addMiningFields(new MiningField(fields(i))
           .setUsageType(FieldUsageType.ACTIVE))
         regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
       }

       // add target field
       val targetField = FieldName.create("target")
       dataDictionary
         .addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
       miningSchema
         .addMiningFields(new MiningField(targetField)
         .setUsageType(FieldUsageType.TARGET))

       dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)

       pmml.setDataDictionary(dataDictionary)
       pmml.addModels(regressionModel)
     }
  }
} 
Example 13
Source File: GeneralizedLinearPMMLModelExport.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = {
    pmml.getHeader.setDescription(description)

    if (model.weights.size > 0) {
      val fields = new SArray[FieldName](model.weights.size)
      val dataDictionary = new DataDictionary
      val miningSchema = new MiningSchema
      val regressionTable = new RegressionTable(model.intercept)
      val regressionModel = new RegressionModel()
        .withFunctionName(MiningFunctionType.REGRESSION)
        .withMiningSchema(miningSchema)
        .withModelName(description)
        .withRegressionTables(regressionTable)

      for (i <- 0 until model.weights.size) {
        fields(i) = FieldName.create("field_" + i)
        dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
        miningSchema
          .withMiningFields(new MiningField(fields(i))
          .withUsageType(FieldUsageType.ACTIVE))
        regressionTable.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
      }

      // for completeness add target field
      val targetField = FieldName.create("target")
      dataDictionary.withDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
      miningSchema
        .withMiningFields(new MiningField(targetField)
        .withUsageType(FieldUsageType.TARGET))

      dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)

      pmml.setDataDictionary(dataDictionary)
      pmml.withModels(regressionModel)
    }
  }
} 
Example 14
Source File: BinaryClassificationPMMLModelExport.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.mllib.pmml.export

import scala.{Array => SArray}

import org.dmg.pmml._

import org.apache.spark.mllib.regression.GeneralizedLinearModel


  private def populateBinaryClassificationPMML(): Unit = {
     pmml.getHeader.setDescription(description)

     if (model.weights.size > 0) {
       val fields = new SArray[FieldName](model.weights.size)
       val dataDictionary = new DataDictionary
       val miningSchema = new MiningSchema
       val regressionTableYES = new RegressionTable(model.intercept).withTargetCategory("1")
       var interceptNO = threshold
       if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) {
         if (threshold <= 0) {
           interceptNO = Double.MinValue
         } else if (threshold >= 1) {
           interceptNO = Double.MaxValue
         } else {
           interceptNO = -math.log(1 / threshold - 1)
         }
       }
       val regressionTableNO = new RegressionTable(interceptNO).withTargetCategory("0")
       val regressionModel = new RegressionModel()
         .withFunctionName(MiningFunctionType.CLASSIFICATION)
         .withMiningSchema(miningSchema)
         .withModelName(description)
         .withNormalizationMethod(normalizationMethod)
         .withRegressionTables(regressionTableYES, regressionTableNO)

       for (i <- 0 until model.weights.size) {
         fields(i) = FieldName.create("field_" + i)
         dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
         miningSchema
           .withMiningFields(new MiningField(fields(i))
           .withUsageType(FieldUsageType.ACTIVE))
         regressionTableYES.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
       }

       // add target field
       val targetField = FieldName.create("target")
       dataDictionary
         .withDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
       miningSchema
         .withMiningFields(new MiningField(targetField)
         .withUsageType(FieldUsageType.TARGET))

       dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)

       pmml.setDataDictionary(dataDictionary)
       pmml.withModels(regressionModel)
     }
  }
} 
Example 15
Source File: SparkPredictorEngine.scala    From elasticsearch-prediction-spark   with Apache License 2.0 5 votes vote down vote up
package com.sdhu.elasticsearchprediction.spark

import com.mahisoft.elasticsearchprediction.plugin.engine.PredictorEngine
import com.mahisoft.elasticsearchprediction.plugin.domain.IndexValue
import com.mahisoft.elasticsearchprediction.plugin.exception.PredictionException

import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.GeneralizedLinearModel

import java.util.Collection

class SparkPredictorEngine[M <: GeneralizedLinearModel](val readPath: String, val spHelp: SparkModelHelpers[M]) extends PredictorEngine {
  
  private var _model: ModelData[M] = ModelData[M]()

  override def getPrediction(values: Collection[IndexValue]): Double = {
    if (_model.clf.nonEmpty) { 
      val v = ReadUtil.cIndVal2Vector(
        values,  
        _model.categoriesMap.getOrElse(Map[String, Double]()))
      
      _model.clf.get.predict(v)
    } else {
      throw new PredictionException("Empty model");
    }
  }
  
  def readModel(): ModelData[M] = {
    _model = spHelp.readSparkModel(readPath)
    _model
  }

  def getModel: ModelData[M] = _model
}