org.apache.spark.mllib.regression.GeneralizedLinearModel Scala Examples
The following examples show how to use org.apache.spark.mllib.regression.GeneralizedLinearModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: GeneralizedLinearPMMLModelExport.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTable = new RegressionTable(model.intercept) val regressionModel = new RegressionModel() .setFunctionName(MiningFunctionType.REGRESSION) .setMiningSchema(miningSchema) .setModelName(description) .addRegressionTables(regressionTable) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .addMiningFields(new MiningField(fields(i)) .setUsageType(FieldUsageType.ACTIVE)) regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // for completeness add target field val targetField = FieldName.create("target") dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .addMiningFields(new MiningField(targetField) .setUsageType(FieldUsageType.TARGET)) dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.addModels(regressionModel) } } }
Example 2
Source File: BinaryClassificationPMMLModelExport.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateBinaryClassificationPMML(): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1") var interceptNO = threshold if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) { if (threshold <= 0) { interceptNO = Double.MinValue } else if (threshold >= 1) { interceptNO = Double.MaxValue } else { interceptNO = -math.log(1 / threshold - 1) } } val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0") val regressionModel = new RegressionModel() .setFunctionName(MiningFunctionType.CLASSIFICATION) .setMiningSchema(miningSchema) .setModelName(description) .setNormalizationMethod(normalizationMethod) .addRegressionTables(regressionTableYES, regressionTableNO) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .addMiningFields(new MiningField(fields(i)) .setUsageType(FieldUsageType.ACTIVE)) regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // add target field val targetField = FieldName.create("target") dataDictionary .addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING)) miningSchema .addMiningFields(new MiningField(targetField) .setUsageType(FieldUsageType.TARGET)) dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.addModels(regressionModel) } } }
Example 3
Source File: GeneralizedLinearPMMLModelExport.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTable = new RegressionTable(model.intercept) val regressionModel = new RegressionModel() .setFunctionName(MiningFunctionType.REGRESSION) .setMiningSchema(miningSchema) .setModelName(description) .addRegressionTables(regressionTable) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .addMiningFields(new MiningField(fields(i)) .setUsageType(FieldUsageType.ACTIVE)) regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // for completeness add target field val targetField = FieldName.create("target") dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .addMiningFields(new MiningField(targetField) .setUsageType(FieldUsageType.TARGET)) dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.addModels(regressionModel) } } }
Example 4
Source File: BinaryClassificationPMMLModelExport.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateBinaryClassificationPMML(): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1") var interceptNO = threshold if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) { if (threshold <= 0) { interceptNO = Double.MinValue } else if (threshold >= 1) { interceptNO = Double.MaxValue } else { interceptNO = -math.log(1 / threshold - 1) } } val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0") val regressionModel = new RegressionModel() .setFunctionName(MiningFunctionType.CLASSIFICATION) .setMiningSchema(miningSchema) .setModelName(description) .setNormalizationMethod(normalizationMethod) .addRegressionTables(regressionTableYES, regressionTableNO) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .addMiningFields(new MiningField(fields(i)) .setUsageType(FieldUsageType.ACTIVE)) regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // add target field val targetField = FieldName.create("target") dataDictionary .addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING)) miningSchema .addMiningFields(new MiningField(targetField) .setUsageType(FieldUsageType.TARGET)) dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.addModels(regressionModel) } } }
Example 5
Source File: GeneralizedLinearPMMLModelExport.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTable = new RegressionTable(model.intercept) val regressionModel = new RegressionModel() .setFunctionName(MiningFunctionType.REGRESSION) .setMiningSchema(miningSchema) .setModelName(description) .addRegressionTables(regressionTable) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .addMiningFields(new MiningField(fields(i)) .setUsageType(FieldUsageType.ACTIVE)) regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // for completeness add target field val targetField = FieldName.create("target") dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .addMiningFields(new MiningField(targetField) .setUsageType(FieldUsageType.TARGET)) dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.addModels(regressionModel) } } }
Example 6
Source File: BinaryClassificationPMMLModelExport.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateBinaryClassificationPMML(): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1") var interceptNO = threshold if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) { if (threshold <= 0) { interceptNO = Double.MinValue } else if (threshold >= 1) { interceptNO = Double.MaxValue } else { interceptNO = -math.log(1 / threshold - 1) } } val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0") val regressionModel = new RegressionModel() .setFunctionName(MiningFunctionType.CLASSIFICATION) .setMiningSchema(miningSchema) .setModelName(description) .setNormalizationMethod(normalizationMethod) .addRegressionTables(regressionTableYES, regressionTableNO) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .addMiningFields(new MiningField(fields(i)) .setUsageType(FieldUsageType.ACTIVE)) regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // add target field val targetField = FieldName.create("target") dataDictionary .addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING)) miningSchema .addMiningFields(new MiningField(targetField) .setUsageType(FieldUsageType.TARGET)) dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.addModels(regressionModel) } } }
Example 7
Source File: GeneralizedLinearPMMLModelExport.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTable = new RegressionTable(model.intercept) val regressionModel = new RegressionModel() .withFunctionName(MiningFunctionType.REGRESSION) .withMiningSchema(miningSchema) .withModelName(description) .withRegressionTables(regressionTable) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .withMiningFields(new MiningField(fields(i)) .withUsageType(FieldUsageType.ACTIVE)) regressionTable.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // for completeness add target field val targetField = FieldName.create("target") dataDictionary.withDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .withMiningFields(new MiningField(targetField) .withUsageType(FieldUsageType.TARGET)) dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.withModels(regressionModel) } } }
Example 8
Source File: BinaryClassificationPMMLModelExport.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateBinaryClassificationPMML(): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTableYES = new RegressionTable(model.intercept).withTargetCategory("1") var interceptNO = threshold if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) { if (threshold <= 0) { interceptNO = Double.MinValue } else if (threshold >= 1) { interceptNO = Double.MaxValue } else { interceptNO = -math.log(1 / threshold - 1) } } val regressionTableNO = new RegressionTable(interceptNO).withTargetCategory("0") val regressionModel = new RegressionModel() .withFunctionName(MiningFunctionType.CLASSIFICATION) .withMiningSchema(miningSchema) .withModelName(description) .withNormalizationMethod(normalizationMethod) .withRegressionTables(regressionTableYES, regressionTableNO) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .withMiningFields(new MiningField(fields(i)) .withUsageType(FieldUsageType.ACTIVE)) regressionTableYES.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // add target field val targetField = FieldName.create("target") dataDictionary .withDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING)) miningSchema .withMiningFields(new MiningField(targetField) .withUsageType(FieldUsageType.TARGET)) dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.withModels(regressionModel) } } }
Example 9
Source File: GeneralizedLinearPMMLModelExport.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTable = new RegressionTable(model.intercept) val regressionModel = new RegressionModel() .withFunctionName(MiningFunctionType.REGRESSION) .withMiningSchema(miningSchema) .withModelName(description) .withRegressionTables(regressionTable) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .withMiningFields(new MiningField(fields(i)) .withUsageType(FieldUsageType.ACTIVE)) regressionTable.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // for completeness add target field val targetField = FieldName.create("target") dataDictionary.withDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .withMiningFields(new MiningField(targetField) .withUsageType(FieldUsageType.TARGET)) dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.withModels(regressionModel) } } }
Example 10
Source File: BinaryClassificationPMMLModelExport.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateBinaryClassificationPMML(): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTableYES = new RegressionTable(model.intercept).withTargetCategory("1") var interceptNO = threshold if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) { if (threshold <= 0) { interceptNO = Double.MinValue } else if (threshold >= 1) { interceptNO = Double.MaxValue } else { interceptNO = -math.log(1 / threshold - 1) } } val regressionTableNO = new RegressionTable(interceptNO).withTargetCategory("0") val regressionModel = new RegressionModel() .withFunctionName(MiningFunctionType.CLASSIFICATION) .withMiningSchema(miningSchema) .withModelName(description) .withNormalizationMethod(normalizationMethod) .withRegressionTables(regressionTableYES, regressionTableNO) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .withMiningFields(new MiningField(fields(i)) .withUsageType(FieldUsageType.ACTIVE)) regressionTableYES.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // add target field val targetField = FieldName.create("target") dataDictionary .withDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING)) miningSchema .withMiningFields(new MiningField(targetField) .withUsageType(FieldUsageType.TARGET)) dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.withModels(regressionModel) } } }
Example 11
Source File: GeneralizedLinearPMMLModelExport.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTable = new RegressionTable(model.intercept) val regressionModel = new RegressionModel() .setFunctionName(MiningFunctionType.REGRESSION) .setMiningSchema(miningSchema) .setModelName(description) .addRegressionTables(regressionTable) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .addMiningFields(new MiningField(fields(i)) .setUsageType(FieldUsageType.ACTIVE)) regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // for completeness add target field val targetField = FieldName.create("target") dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .addMiningFields(new MiningField(targetField) .setUsageType(FieldUsageType.TARGET)) dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.addModels(regressionModel) } } }
Example 12
Source File: BinaryClassificationPMMLModelExport.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateBinaryClassificationPMML(): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1") var interceptNO = threshold if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) { if (threshold <= 0) { interceptNO = Double.MinValue } else if (threshold >= 1) { interceptNO = Double.MaxValue } else { interceptNO = -math.log(1 / threshold - 1) } } val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0") val regressionModel = new RegressionModel() .setFunctionName(MiningFunctionType.CLASSIFICATION) .setMiningSchema(miningSchema) .setModelName(description) .setNormalizationMethod(normalizationMethod) .addRegressionTables(regressionTableYES, regressionTableNO) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .addMiningFields(new MiningField(fields(i)) .setUsageType(FieldUsageType.ACTIVE)) regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // add target field val targetField = FieldName.create("target") dataDictionary .addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING)) miningSchema .addMiningFields(new MiningField(targetField) .setUsageType(FieldUsageType.TARGET)) dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.addModels(regressionModel) } } }
Example 13
Source File: GeneralizedLinearPMMLModelExport.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateGeneralizedLinearPMML(model: GeneralizedLinearModel): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTable = new RegressionTable(model.intercept) val regressionModel = new RegressionModel() .withFunctionName(MiningFunctionType.REGRESSION) .withMiningSchema(miningSchema) .withModelName(description) .withRegressionTables(regressionTable) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .withMiningFields(new MiningField(fields(i)) .withUsageType(FieldUsageType.ACTIVE)) regressionTable.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // for completeness add target field val targetField = FieldName.create("target") dataDictionary.withDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .withMiningFields(new MiningField(targetField) .withUsageType(FieldUsageType.TARGET)) dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.withModels(regressionModel) } } }
Example 14
Source File: BinaryClassificationPMMLModelExport.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.pmml.export import scala.{Array => SArray} import org.dmg.pmml._ import org.apache.spark.mllib.regression.GeneralizedLinearModel private def populateBinaryClassificationPMML(): Unit = { pmml.getHeader.setDescription(description) if (model.weights.size > 0) { val fields = new SArray[FieldName](model.weights.size) val dataDictionary = new DataDictionary val miningSchema = new MiningSchema val regressionTableYES = new RegressionTable(model.intercept).withTargetCategory("1") var interceptNO = threshold if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) { if (threshold <= 0) { interceptNO = Double.MinValue } else if (threshold >= 1) { interceptNO = Double.MaxValue } else { interceptNO = -math.log(1 / threshold - 1) } } val regressionTableNO = new RegressionTable(interceptNO).withTargetCategory("0") val regressionModel = new RegressionModel() .withFunctionName(MiningFunctionType.CLASSIFICATION) .withMiningSchema(miningSchema) .withModelName(description) .withNormalizationMethod(normalizationMethod) .withRegressionTables(regressionTableYES, regressionTableNO) for (i <- 0 until model.weights.size) { fields(i) = FieldName.create("field_" + i) dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) miningSchema .withMiningFields(new MiningField(fields(i)) .withUsageType(FieldUsageType.ACTIVE)) regressionTableYES.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } // add target field val targetField = FieldName.create("target") dataDictionary .withDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING)) miningSchema .withMiningFields(new MiningField(targetField) .withUsageType(FieldUsageType.TARGET)) dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size) pmml.setDataDictionary(dataDictionary) pmml.withModels(regressionModel) } } }
Example 15
Source File: SparkPredictorEngine.scala From elasticsearch-prediction-spark with Apache License 2.0 | 5 votes |
package com.sdhu.elasticsearchprediction.spark import com.mahisoft.elasticsearchprediction.plugin.engine.PredictorEngine import com.mahisoft.elasticsearchprediction.plugin.domain.IndexValue import com.mahisoft.elasticsearchprediction.plugin.exception.PredictionException import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.GeneralizedLinearModel import java.util.Collection class SparkPredictorEngine[M <: GeneralizedLinearModel](val readPath: String, val spHelp: SparkModelHelpers[M]) extends PredictorEngine { private var _model: ModelData[M] = ModelData[M]() override def getPrediction(values: Collection[IndexValue]): Double = { if (_model.clf.nonEmpty) { val v = ReadUtil.cIndVal2Vector( values, _model.categoriesMap.getOrElse(Map[String, Double]())) _model.clf.get.predict(v) } else { throw new PredictionException("Empty model"); } } def readModel(): ModelData[M] = { _model = spHelp.readSparkModel(readPath) _model } def getModel: ModelData[M] = _model }