Java Code Examples for weka.core.ContingencyTables#entropyConditionedOnRows()
The following examples show how to use
weka.core.ContingencyTables#entropyConditionedOnRows() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DecisionStump.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Finds best split for nominal attribute and nominal class * and returns value. * * @param index attribute index * @return value of criterion for the best split * @throws Exception if something goes wrong */ protected double findSplitNominalNominal(int index) throws Exception { double bestVal = Double.MAX_VALUE, currVal; double[][] counts = new double[m_Instances.attribute(index).numValues() + 1][m_Instances.numClasses()]; double[] sumCounts = new double[m_Instances.numClasses()]; double[][] bestDist = new double[3][m_Instances.numClasses()]; int numMissing = 0; // Compute counts for all the values for (int i = 0; i < m_Instances.numInstances(); i++) { Instance inst = m_Instances.instance(i); if (inst.isMissing(index)) { numMissing++; counts[m_Instances.attribute(index).numValues()] [(int)inst.classValue()] += inst.weight(); } else { counts[(int)inst.value(index)][(int)inst.classValue()] += inst .weight(); } } // Compute sum of counts for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) { for (int j = 0; j < m_Instances.numClasses(); j++) { sumCounts[j] += counts[i][j]; } } // Make split counts for each possible split and evaluate System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0, m_Distribution[2], 0, m_Instances.numClasses()); for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) { for (int j = 0; j < m_Instances.numClasses(); j++) { m_Distribution[0][j] = counts[i][j]; m_Distribution[1][j] = sumCounts[j] - counts[i][j]; } currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution); if (currVal < bestVal) { bestVal = currVal; m_SplitPoint = (double)i; for (int j = 0; j < 3; j++) { System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, m_Instances.numClasses()); } } } // No missing values in training data. if (numMissing == 0) { System.arraycopy(sumCounts, 0, bestDist[2], 0, m_Instances.numClasses()); } m_Distribution = bestDist; return bestVal; }
Example 2
Source File: Discretize.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Test using Fayyad and Irani's MDL criterion. * * @param priorCounts * @param bestCounts * @param numInstances * @param numCutPoints * @return true if the splits is acceptable */ private boolean FayyadAndIranisMDL(double[] priorCounts, double[][] bestCounts, double numInstances, int numCutPoints) { double priorEntropy, entropy, gain; double entropyLeft, entropyRight, delta; int numClassesTotal, numClassesRight, numClassesLeft; // Compute entropy before split. priorEntropy = ContingencyTables.entropy(priorCounts); // Compute entropy after split. entropy = ContingencyTables.entropyConditionedOnRows(bestCounts); // Compute information gain. gain = priorEntropy - entropy; // Number of classes occuring in the set numClassesTotal = 0; for (int i = 0; i < priorCounts.length; i++) { if (priorCounts[i] > 0) { numClassesTotal++; } } // Number of classes occuring in the left subset numClassesLeft = 0; for (int i = 0; i < bestCounts[0].length; i++) { if (bestCounts[0][i] > 0) { numClassesLeft++; } } // Number of classes occuring in the right subset numClassesRight = 0; for (int i = 0; i < bestCounts[1].length; i++) { if (bestCounts[1][i] > 0) { numClassesRight++; } } // Entropy of the left and the right subsets entropyLeft = ContingencyTables.entropy(bestCounts[0]); entropyRight = ContingencyTables.entropy(bestCounts[1]); // Compute terms for MDL formula delta = Utils.log2(Math.pow(3, numClassesTotal) - 2) - (((double) numClassesTotal * priorEntropy) - (numClassesRight * entropyRight) - (numClassesLeft * entropyLeft)); // Check if split is to be accepted return (gain > (Utils.log2(numCutPoints) + delta) / (double)numInstances); }
Example 3
Source File: LPS.java From tsml with GNU General Public License v3.0 | 2 votes |
/** * Computes value of splitting criterion after split. * * @param dist the distributions * @param priorVal the splitting criterion * @return the gain after the split */ protected double gain(double[][] dist, double priorVal) { return priorVal - ContingencyTables.entropyConditionedOnRows(dist); }
Example 4
Source File: RandomTree.java From tsml with GNU General Public License v3.0 | 2 votes |
/** * Computes value of splitting criterion after split. * * @param dist the distributions * @param priorVal the splitting criterion * @return the gain after the split */ protected double gain(double[][] dist, double priorVal) { return priorVal - ContingencyTables.entropyConditionedOnRows(dist); }
Example 5
Source File: REPTree.java From tsml with GNU General Public License v3.0 | 2 votes |
/** * Computes value of splitting criterion after split. * * @param dist * @param priorVal the splitting criterion * @return the gain after splitting */ protected double gain(double[][] dist, double priorVal) { return priorVal - ContingencyTables.entropyConditionedOnRows(dist); }
Example 6
Source File: RandomTree.java From KEEL with GNU General Public License v3.0 | 2 votes |
/** * Computes value of splitting criterion after split. * * @param dist * the distributions * @param priorVal * the splitting criterion * @return the gain after the split */ protected double gain(double[][] dist, double priorVal) { return priorVal - ContingencyTables.entropyConditionedOnRows(dist); }
Example 7
Source File: RandomTree.java From KEEL with GNU General Public License v3.0 | 2 votes |
/** * Computes value of splitting criterion after split. * * @param dist * the distributions * @param priorVal * the splitting criterion * @return the gain after the split */ protected double gain(double[][] dist, double priorVal) { return priorVal - ContingencyTables.entropyConditionedOnRows(dist); }
Example 8
Source File: RandomTree.java From KEEL with GNU General Public License v3.0 | 2 votes |
/** * Computes value of splitting criterion after split. * * @param dist * the distributions * @param priorVal * the splitting criterion * @return the gain after the split */ protected double gain(double[][] dist, double priorVal) { return priorVal - ContingencyTables.entropyConditionedOnRows(dist); }
Example 9
Source File: CollectiveTree.java From collective-classification-weka-package with GNU General Public License v3.0 | 2 votes |
/** * Computes value of splitting criterion after split. * * @param dist the distribution * @param priorVal the prior val * @return the gain */ protected double gain(double[][] dist, double priorVal) { return priorVal - ContingencyTables.entropyConditionedOnRows(dist); }