Java Code Examples for weka.core.Utils#isMissingValue()
The following examples show how to use
weka.core.Utils#isMissingValue() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Evaluation.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Calculates the weighted (by class size) matthews correlation coefficient. * * @return the weighted matthews correlation coefficient. */ public double weightedMatthewsCorrelation() { double[] classCounts = new double[m_NumClasses]; double classCountSum = 0; for (int i = 0; i < m_NumClasses; i++) { for (int j = 0; j < m_NumClasses; j++) { classCounts[i] += m_ConfusionMatrix[i][j]; } classCountSum += classCounts[i]; } double mccTotal = 0; for (int i = 0; i < m_NumClasses; i++) { double temp = matthewsCorrelationCoefficient(i); if (!Utils.isMissingValue(temp)) { mccTotal += (temp * classCounts[i]); } } return mccTotal / classCountSum; }
Example 2
Source File: AbstractClassifier.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Predicts the class memberships for a given instance. If * an instance is unclassified, the returned array elements * must be all zero. If the class is numeric, the array * must consist of only one element, which contains the * predicted value. Note that a classifier MUST implement * either this or classifyInstance(). * * @param instance the instance to be classified * @return an array containing the estimated membership * probabilities of the test instance in each class * or the numeric prediction * @exception Exception if distribution could not be * computed successfully */ public double[] distributionForInstance(Instance instance) throws Exception { double[] dist = new double[instance.numClasses()]; switch (instance.classAttribute().type()) { case Attribute.NOMINAL: double classification = classifyInstance(instance); if (Utils.isMissingValue(classification)) { return dist; } else { dist[(int)classification] = 1.0; } return dist; case Attribute.NUMERIC: dist[0] = classifyInstance(instance); return dist; default: return dist; } }
Example 3
Source File: GaussianConditionalSufficientStats.java From tsml with GNU General Public License v3.0 | 6 votes |
@Override public void update(double attVal, String classVal, double weight) { if (!Utils.isMissingValue(attVal)) { GaussianEstimator norm = (GaussianEstimator) m_classLookup.get(classVal); if (norm == null) { norm = new GaussianEstimator(); m_classLookup.put(classVal, norm); m_minValObservedPerClass.put(classVal, attVal); m_maxValObservedPerClass.put(classVal, attVal); } else { if (attVal < m_minValObservedPerClass.get(classVal)) { m_minValObservedPerClass.put(classVal, attVal); } if (attVal > m_maxValObservedPerClass.get(classVal)) { m_maxValObservedPerClass.put(classVal, attVal); } } norm.addValue(attVal, weight); } }
Example 4
Source File: NominalConditionalSufficientStats.java From tsml with GNU General Public License v3.0 | 6 votes |
@SuppressWarnings("unchecked") @Override public void update(double attVal, String classVal, double weight) { if (Utils.isMissingValue(attVal)) { m_missingWeight += weight; } else { Integer attIndex = new Integer((int) attVal); ValueDistribution valDist = (ValueDistribution) m_classLookup .get(classVal); if (valDist == null) { valDist = new ValueDistribution(); valDist.add((int) attVal, weight); m_classLookup.put(classVal, valDist); } else { valDist.add((int) attVal, weight); } } m_totalWeight += weight; }
Example 5
Source File: Id3.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Outputs a tree at a certain level. * * @param level the level at which the tree is to be printed * @return the tree as string at the given level */ private String toString(int level) { StringBuffer text = new StringBuffer(); if (m_Attribute == null) { if (Utils.isMissingValue(m_ClassValue)) { text.append(": null"); } else { text.append(": " + m_ClassAttribute.value((int) m_ClassValue)); } } else { for (int j = 0; j < m_Attribute.numValues(); j++) { text.append("\n"); for (int i = 0; i < level; i++) { text.append("| "); } text.append(m_Attribute.name() + " = " + m_Attribute.value(j)); text.append(m_Successors[j].toString(level + 1)); } } return text.toString(); }
Example 6
Source File: Evaluation.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Calculates the weighted (by class size) AUC. * * @return the weighted AUC. */ public double weightedAreaUnderROC() { double[] classCounts = new double[m_NumClasses]; double classCountSum = 0; for (int i = 0; i < m_NumClasses; i++) { for (int j = 0; j < m_NumClasses; j++) { classCounts[i] += m_ConfusionMatrix[i][j]; } classCountSum += classCounts[i]; } double aucTotal = 0; for (int i = 0; i < m_NumClasses; i++) { double temp = areaUnderROC(i); if (!Utils.isMissingValue(temp)) { aucTotal += (temp * classCounts[i]); } } return aucTotal / classCountSum; }
Example 7
Source File: EuclidianDataObject.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Performs euclidian-distance-calculation between two given values * @param index of the attribute within the DataObject's instance * @param v value_1 * @param v1 value_2 * @return double norm-distance between value_1 and value_2 */ private double computeDistance(int index, double v, double v1) { switch (getInstance().attribute(index).type()) { case Attribute.NOMINAL: return (Utils.isMissingValue(v) || Utils.isMissingValue(v1) || ((int) v != (int) v1)) ? 1 : 0; case Attribute.NUMERIC: if (Utils.isMissingValue(v) || Utils.isMissingValue(v1)) { if (Utils.isMissingValue(v) && Utils.isMissingValue(v1)) return 1; else { return (Utils.isMissingValue(v)) ? norm(v1, index) : norm(v, index); } } else return norm(v, index) - norm(v1, index); default: return 0; } }
Example 8
Source File: IademVFMLNumericAttributeClassObserver.java From moa with GNU General Public License v3.0 | 5 votes |
@Override public void addValue(double attValue, int classValue, double weight) { if (Utils.isMissingValue(attValue)) { } else { this.classDist.addToValue(classValue, weight); observeAttributeClass(attValue, classValue, weight); } }
Example 9
Source File: IademGaussianNumericAttributeClassObserver.java From moa with GNU General Public License v3.0 | 5 votes |
@Override public void addValue(double attValue, int classValue, double weight) { if (Utils.isMissingValue(attValue)) { } else { this.valueCount += weight; this.classDist.addToValue(classValue, weight); observeAttributeClass(attValue, classValue, weight); } }
Example 10
Source File: Neighbors.java From collective-classification-weka-package with GNU General Public License v3.0 | 5 votes |
/** * Returns the current class associated with the neighbors, i.e., majority * vote, in case of a tie, the first class label. * * @return the current class label */ public double getClassValue() { double result; if (!getInstance().classIsMissing()) { result = getInstance().classValue(); } else { if (Utils.isMissingValue(m_ClassValue)) calculate(); result = m_ClassValue; } return result; }
Example 11
Source File: Evaluation.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Updates all the statistics about a predictors performance for the current * test instance. * * @param predictedValue the numeric value the classifier predicts * @param instance the instance to be classified * @throws Exception if the class of the instance is not set */ protected void updateStatsForPredictor(double predictedValue, Instance instance) throws Exception { if (!instance.classIsMissing()) { // Update stats m_WithClass += instance.weight(); if (Utils.isMissingValue(predictedValue)) { m_Unclassified += instance.weight(); return; } m_SumClass += instance.weight() * instance.classValue(); m_SumSqrClass += instance.weight() * instance.classValue() * instance.classValue(); m_SumClassPredicted += instance.weight() * instance.classValue() * predictedValue; m_SumPredicted += instance.weight() * predictedValue; m_SumSqrPredicted += instance.weight() * predictedValue * predictedValue; updateNumericScores(makeDistribution(predictedValue), makeDistribution(instance.classValue()), instance.weight()); } else m_MissingClass += instance.weight(); if (m_pluginMetrics != null) { for (AbstractEvaluationMetric m : m_pluginMetrics) { if (m instanceof StandardEvaluationMetric) { ((StandardEvaluationMetric) m).updateStatsForPredictor( predictedValue, instance); } else if (m instanceof InformationTheoreticEvaluationMetric) { ((InformationTheoreticEvaluationMetric) m).updateStatsForPredictor( predictedValue, instance); } } } }
Example 12
Source File: IademGreenwaldKhannaNumericAttributeClassObserver.java From moa with GNU General Public License v3.0 | 5 votes |
@Override public void observeAttributeClass(double attVal, int classVal, double weight) { if (Utils.isMissingValue(attVal)) { } else { IademGreenwaldKhannaQuantileSummary valDist = (IademGreenwaldKhannaQuantileSummary) this.attValDistPerClass.get(classVal); if (valDist == null) { valDist = new IademGreenwaldKhannaQuantileSummary(this.numTuplesOption.getValue()); this.attValDistPerClass.set(classVal, valDist); } // TODO: not taking weight into account valDist.insert(attVal); } }
Example 13
Source File: BFTree.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Outputs a tree at a certain level. * * @param level the level at which the tree is to be printed * @return a tree at a certain level. */ protected String toString(int level) { StringBuffer text = new StringBuffer(); // if leaf nodes if (m_Attribute == null) { if (Utils.isMissingValue(m_ClassValue)) { text.append(": null"); } else { double correctNum = Math.rint(m_Distribution[Utils.maxIndex(m_Distribution)]*100)/ 100.0; double wrongNum = Math.rint((Utils.sum(m_Distribution) - m_Distribution[Utils.maxIndex(m_Distribution)])*100)/100.0; String str = "(" + correctNum + "/" + wrongNum + ")"; text.append(": " + m_ClassAttribute.value((int) m_ClassValue)+ str); } } else { for (int j = 0; j < 2; j++) { text.append("\n"); for (int i = 0; i < level; i++) { text.append("| "); } if (j==0) { if (m_Attribute.isNumeric()) text.append(m_Attribute.name() + " < " + m_SplitValue); else text.append(m_Attribute.name() + "=" + m_SplitString); } else { if (m_Attribute.isNumeric()) text.append(m_Attribute.name() + " >= " + m_SplitValue); else text.append(m_Attribute.name() + "!=" + m_SplitString); } text.append(m_Successors[j].toString(level + 1)); } } return text.toString(); }
Example 14
Source File: HTML.java From tsml with GNU General Public License v3.0 | 4 votes |
protected void doPrintClassification(double[] dist, Instance inst, int index) throws Exception { int prec = m_NumDecimals; Instance withMissing = (Instance)inst.copy(); withMissing.setDataset(inst.dataset()); double predValue = 0; if (Utils.sum(dist) == 0) { predValue = Utils.missingValue(); } else { if (inst.classAttribute().isNominal()) { predValue = Utils.maxIndex(dist); } else { predValue = dist[0]; } } // index append("<tr>"); append("<td>" + (index+1) + "</td>"); if (inst.dataset().classAttribute().isNumeric()) { // actual if (inst.classIsMissing()) append("<td align=\"right\">" + "?" + "</td>"); else append("<td align=\"right\">" + Utils.doubleToString(inst.classValue(), prec) + "</td>"); // predicted if (Utils.isMissingValue(predValue)) append("<td align=\"right\">" + "?" + "</td>"); else append("<td align=\"right\">" + Utils.doubleToString(predValue, prec) + "</td>"); // error if (Utils.isMissingValue(predValue) || inst.classIsMissing()) append("<td align=\"right\">" + "?" + "</td>"); else append("<td align=\"right\">" + Utils.doubleToString(predValue - inst.classValue(), prec) + "</td>"); } else { // actual append("<td>" + ((int) inst.classValue()+1) + ":" + sanitize(inst.toString(inst.classIndex())) + "</td>"); // predicted if (Utils.isMissingValue(predValue)) append("<td>" + "?" + "</td>"); else append("<td>" + ((int) predValue+1) + ":" + sanitize(inst.dataset().classAttribute().value((int)predValue)) + "</td>"); // error? if (!Utils.isMissingValue(predValue) && !inst.classIsMissing() && ((int) predValue+1 != (int) inst.classValue()+1)) append("<td>" + "+" + "</td>"); else append("<td>" + " " + "</td>"); // prediction/distribution if (m_OutputDistribution) { if (Utils.isMissingValue(predValue)) { append("<td>" + "?" + "</td>"); } else { append("<td align=\"right\">"); for (int n = 0; n < dist.length; n++) { if (n > 0) append("</td><td align=\"right\">"); if (n == (int) predValue) append("*"); append(Utils.doubleToString(dist[n], prec)); } append("</td>"); } } else { if (Utils.isMissingValue(predValue)) append("<td align=\"right\">" + "?" + "</td>"); else append("<td align=\"right\">" + Utils.doubleToString(dist[(int)predValue], prec) + "</td>"); } } // attributes append(attributeValuesString(withMissing) + "</tr>\n"); }
Example 15
Source File: PlainText.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Store the prediction made by the classifier as a string. * * @param dist the distribution to use * @param inst the instance to generate text from * @param index the index in the dataset * @throws Exception if something goes wrong */ protected void doPrintClassification(double[] dist, Instance inst, int index) throws Exception { int width = 7 + m_NumDecimals; int prec = m_NumDecimals; Instance withMissing = (Instance)inst.copy(); withMissing.setDataset(inst.dataset()); double predValue = 0; if (Utils.sum(dist) == 0) { predValue = Utils.missingValue(); } else { if (inst.classAttribute().isNominal()) { predValue = Utils.maxIndex(dist); } else { predValue = dist[0]; } } // index append(Utils.padLeft("" + (index+1), 6)); if (inst.dataset().classAttribute().isNumeric()) { // actual if (inst.classIsMissing()) append(" " + Utils.padLeft("?", width)); else append(" " + Utils.doubleToString(inst.classValue(), width, prec)); // predicted if (Utils.isMissingValue(predValue)) append(" " + Utils.padLeft("?", width)); else append(" " + Utils.doubleToString(predValue, width, prec)); // error if (Utils.isMissingValue(predValue) || inst.classIsMissing()) append(" " + Utils.padLeft("?", width)); else append(" " + Utils.doubleToString(predValue - inst.classValue(), width, prec)); } else { // actual append(" " + Utils.padLeft(((int) inst.classValue()+1) + ":" + inst.toString(inst.classIndex()), width)); // predicted if (Utils.isMissingValue(predValue)) append(" " + Utils.padLeft("?", width)); else append(" " + Utils.padLeft(((int) predValue+1) + ":" + inst.dataset().classAttribute().value((int)predValue), width)); // error? if (!Utils.isMissingValue(predValue) && !inst.classIsMissing() && ((int) predValue+1 != (int) inst.classValue()+1)) append(" " + " + "); else append(" " + " "); // prediction/distribution if (m_OutputDistribution) { if (Utils.isMissingValue(predValue)) { append(" " + "?"); } else { append(" "); for (int n = 0; n < dist.length; n++) { if (n > 0) append(","); if (n == (int) predValue) append("*"); append(Utils.doubleToString(dist[n], prec)); } } } else { if (Utils.isMissingValue(predValue)) append(" " + "?"); else append(" " + Utils.doubleToString(dist[(int)predValue], prec)); } } // attributes append(" " + attributeValuesString(withMissing) + "\n"); }
Example 16
Source File: Normalize.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Signify that this batch of input to the filter is finished. * If the filter requires all instances prior to filtering, * output() may now be called to retrieve the filtered instances. * * @return true if there are instances pending output * @throws Exception if an error occurs * @throws IllegalStateException if no input structure has been defined */ public boolean batchFinished() throws Exception { if (getInputFormat() == null) throw new IllegalStateException("No input instance format defined"); if (m_MinArray == null) { Instances input = getInputFormat(); // Compute minimums and maximums m_MinArray = new double[input.numAttributes()]; m_MaxArray = new double[input.numAttributes()]; for (int i = 0; i < input.numAttributes(); i++) m_MinArray[i] = Double.NaN; for (int j = 0; j < input.numInstances(); j++) { double[] value = input.instance(j).toDoubleArray(); for (int i = 0; i < input.numAttributes(); i++) { if (input.attribute(i).isNumeric() && (input.classIndex() != i)) { if (!Utils.isMissingValue(value[i])) { if (Double.isNaN(m_MinArray[i])) { m_MinArray[i] = m_MaxArray[i] = value[i]; } else { if (value[i] < m_MinArray[i]) m_MinArray[i] = value[i]; if (value[i] > m_MaxArray[i]) m_MaxArray[i] = value[i]; } } } } } // Convert pending input instances for (int i = 0; i < input.numInstances(); i++) convertInstance(input.instance(i)); } // Free memory flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); }
Example 17
Source File: XML.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Store the prediction made by the classifier as a string. * * @param dist the distribution to use * @param inst the instance to generate text from * @param index the index in the dataset * @throws Exception if something goes wrong */ protected void doPrintClassification(double[] dist, Instance inst, int index) throws Exception { int prec = m_NumDecimals; Instance withMissing = (Instance)inst.copy(); withMissing.setDataset(inst.dataset()); double predValue = 0; if (Utils.sum(dist) == 0) { predValue = Utils.missingValue(); } else { if (inst.classAttribute().isNominal()) { predValue = Utils.maxIndex(dist); } else { predValue = dist[0]; } } // opening tag append(" <" + TAG_PREDICTION + " " + ATT_INDEX + "=\"" + (index+1) + "\">\n"); if (inst.dataset().classAttribute().isNumeric()) { // actual append(" <" + TAG_ACTUAL_VALUE + ">"); if (inst.classIsMissing()) append("?"); else append(Utils.doubleToString(inst.classValue(), prec)); append("</" + TAG_ACTUAL_VALUE + ">\n"); // predicted append(" <" + TAG_PREDICTED_VALUE + ">"); if (inst.classIsMissing()) append("?"); else append(Utils.doubleToString(predValue, prec)); append("</" + TAG_PREDICTED_VALUE + ">\n"); // error append(" <" + TAG_ERROR + ">"); if (Utils.isMissingValue(predValue) || inst.classIsMissing()) append("?"); else append(Utils.doubleToString(predValue - inst.classValue(), prec)); append("</" + TAG_ERROR + ">\n"); } else { // actual append(" <" + TAG_ACTUAL_LABEL + " " + ATT_INDEX + "=\"" + ((int) inst.classValue()+1) + "\"" + ">"); append(sanitize(inst.toString(inst.classIndex()))); append("</" + TAG_ACTUAL_LABEL + ">\n"); // predicted append(" <" + TAG_PREDICTED_LABEL + " " + ATT_INDEX + "=\"" + ((int) predValue+1) + "\"" + ">"); if (Utils.isMissingValue(predValue)) append("?"); else append(sanitize(inst.dataset().classAttribute().value((int)predValue))); append("</" + TAG_PREDICTED_LABEL + ">\n"); // error? append(" <" + TAG_ERROR + ">"); if (!Utils.isMissingValue(predValue) && !inst.classIsMissing() && ((int) predValue+1 != (int) inst.classValue()+1)) append(VAL_YES); else append(VAL_NO); append("</" + TAG_ERROR + ">\n"); // prediction/distribution if (m_OutputDistribution) { append(" <" + TAG_DISTRIBUTION + ">\n"); for (int n = 0; n < dist.length; n++) { append(" <" + TAG_CLASS_LABEL + " " + ATT_INDEX + "=\"" + (n+1) + "\""); if (!Utils.isMissingValue(predValue) && (n == (int) predValue)) append(" " + ATT_PREDICTED + "=\"" + VAL_YES + "\""); append(">"); append(Utils.doubleToString(dist[n], prec)); append("</" + TAG_CLASS_LABEL + ">\n"); } append(" </" + TAG_DISTRIBUTION + ">\n"); } else { append(" <" + TAG_PREDICTION + ">"); if (Utils.isMissingValue(predValue)) append("?"); else append(Utils.doubleToString(dist[(int)predValue], prec)); append("</" + TAG_PREDICTION + ">\n"); } } // attributes if (m_Attributes != null) append(attributeValuesString(withMissing)); // closing tag append(" </" + TAG_PREDICTION + ">\n"); }
Example 18
Source File: ReliefFAttributeEval.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Computes the difference between two given attribute * values. */ private double difference(int index, double val1, double val2) { switch (m_trainInstances.attribute(index).type()) { case Attribute.NOMINAL: // If attribute is nominal if (Utils.isMissingValue(val1) || Utils.isMissingValue(val2)) { return (1.0 - (1.0/((double)m_trainInstances. attribute(index).numValues()))); } else if ((int)val1 != (int)val2) { return 1; } else { return 0; } case Attribute.NUMERIC: // If attribute is numeric if (Utils.isMissingValue(val1) || Utils.isMissingValue(val2)) { if (Utils.isMissingValue(val1) && Utils.isMissingValue(val2)) { return 1; } else { double diff; if (Utils.isMissingValue(val2)) { diff = norm(val1, index); } else { diff = norm(val2, index); } if (diff < 0.5) { diff = 1.0 - diff; } return diff; } } else { return Math.abs(norm(val1, index) - norm(val2, index)); } default: return 0; } }
Example 19
Source File: FarthestFirst.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Computes the difference between two given attribute * values. */ protected double difference(int index, double val1, double val2) { switch (m_instances.attribute(index).type()) { case Attribute.NOMINAL: // If attribute is nominal if (Utils.isMissingValue(val1) || Utils.isMissingValue(val2) || ((int)val1 != (int)val2)) { return 1; } else { return 0; } case Attribute.NUMERIC: // If attribute is numeric if (Utils.isMissingValue(val1) || Utils.isMissingValue(val2)) { if (Utils.isMissingValue(val1) && Utils.isMissingValue(val2)) { return 1; } else { double diff; if (Utils.isMissingValue(val2)) { diff = norm(val1, index); } else { diff = norm(val2, index); } if (diff < 0.5) { diff = 1.0 - diff; } return diff; } } else { return norm(val1, index) - norm(val2, index); } default: return 0; } }
Example 20
Source File: InputMappedClassifier.java From tsml with GNU General Public License v3.0 | 4 votes |
public Instance constructMappedInstance(Instance incoming) throws Exception { boolean regenerateMapping = false; if (m_inputHeader == null) { m_inputHeader = incoming.dataset(); regenerateMapping = true; m_initialTestStructureKnown = false; } else if (!m_inputHeader.equalHeaders(incoming.dataset())) { /*System.out.println("[InputMappedClassifier] incoming data does not match " + "last known input format - regenerating mapping..."); System.out.println("Incoming\n" + new Instances(incoming.dataset(), 0)); System.out.println("Stored input header\n" + new Instances(m_inputHeader, 0)); System.out.println("Model header\n" + new Instances(m_modelHeader, 0)); */ m_inputHeader = incoming.dataset(); regenerateMapping = true; m_initialTestStructureKnown = false; } else if (m_attributeMap == null) { regenerateMapping = true; m_initialTestStructureKnown = false; } if (regenerateMapping) { regenerateMapping(); m_vals = null; if (!m_suppressMappingReport) { StringBuffer result = createMappingReport(); System.out.println(result.toString()); } } m_vals = new double[m_modelHeader.numAttributes()]; for (int i = 0; i < m_modelHeader.numAttributes(); i++) { if (m_attributeStatus[i] == OK) { Attribute modelAtt = m_modelHeader.attribute(i); Attribute incomingAtt = m_inputHeader.attribute(m_attributeMap[i]); if (Utils.isMissingValue(incoming.value(m_attributeMap[i]))) { m_vals[i] = Utils.missingValue(); continue; } if (modelAtt.isNumeric()) { m_vals[i] = incoming.value(m_attributeMap[i]); } else if (modelAtt.isNominal()) { int mapVal = m_nominalValueMap[i][(int)incoming.value(m_attributeMap[i])]; if (mapVal == NO_MATCH) { m_vals[i] = Utils.missingValue(); } else { m_vals[i] = mapVal; } } } else { m_vals[i] = Utils.missingValue(); } } Instance newInst = new DenseInstance(incoming.weight(), m_vals); newInst.setDataset(m_modelHeader); return newInst; }