Java Code Examples for weka.core.Utils#log2()
The following examples show how to use
weka.core.Utils#log2() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Evaluation.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Calculate the entropy of the prior distribution. * * @return the entropy of the prior distribution * @throws Exception if the class is not nominal */ public final double priorEntropy() throws Exception { if (!m_ClassIsNominal) { throw new Exception("Can't compute entropy of class prior: " + "class numeric!"); } if (m_NoPriors) return Double.NaN; double entropy = 0; for (int i = 0; i < m_NumClasses; i++) { entropy -= m_ClassPriors[i] / m_ClassPriorsSum * Utils.log2(m_ClassPriors[i] / m_ClassPriorsSum); } return entropy; }
Example 2
Source File: Id3.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Computes the entropy of a dataset. * * @param data the data for which entropy is to be computed * @return the entropy of the data's class distribution * @throws Exception if computation fails */ private double computeEntropy(Instances data) throws Exception { double [] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; } double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) data.numInstances(); return entropy + Utils.log2(data.numInstances()); }
Example 3
Source File: CollectiveTree.java From collective-classification-weka-package with GNU General Public License v3.0 | 6 votes |
/** * here initialization and building, possible iterations will happen * * @throws Exception if something goes wrong */ @Override protected void build() throws Exception { // determine number of features to be selected m_KValue = getNumFeatures(); if (m_KValue < 1) m_KValue = (int) Utils.log2(m_Trainset.numAttributes()) + 1; // Make sure K value is in range if (m_KValue > m_Trainset.numAttributes() - 1) m_KValue = m_Trainset.numAttributes() - 1; // build classifier m_Random = m_Trainset.getRandomNumberGenerator(getSeed()); buildClassifier(); }
Example 4
Source File: Evaluation.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Updates stats for conditional density estimator based on current test * instance. * * @param classifier the conditional density estimator * @param classMissing the instance for which density is to be computed, * without a class value * @param classValue the class value of this instance * @throws Exception if density could not be computed successfully */ protected void updateStatsForConditionalDensityEstimator( ConditionalDensityEstimator classifier, Instance classMissing, double classValue) throws Exception { if (m_PriorEstimator == null) { setNumericPriorsFromBuffer(); } m_SumSchemeEntropy -= classifier.logDensity(classMissing, classValue) * classMissing.weight() / Utils.log2; m_SumPriorEntropy -= m_PriorEstimator.logDensity(classValue) * classMissing.weight() / Utils.log2; }
Example 5
Source File: InfoGainSplitMetric.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override public double getMetricRange(Map<String, WeightMass> preDist) { int numClasses = preDist.size(); if (numClasses < 2) { numClasses = 2; } return Utils.log2(numClasses); }
Example 6
Source File: BFTree.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Compute and return entropy for a given distribution of a node. * * @param dist class distributions * @param total class distributions * @return entropy of the class distributions */ protected double computeEntropy(double[] dist, double total) { if (total==0) return 0; double entropy = 0; for (int i=0; i<dist.length; i++) { if (dist[i]!=0) entropy -= dist[i]/total * Utils.log2(dist[i]/total); } return entropy; }
Example 7
Source File: RuleStats.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * The description length of the theory for a given rule. Computed as:<br> * 0.5* [||k||+ S(t, k, k/t)]<br> * where k is the number of antecedents of the rule; t is the total * possible antecedents that could appear in a rule; ||K|| is the * universal prior for k , log2*(k) and S(t,k,p) = -k*log2(p)-(n-k)log2(1-p) * is the subset encoding length.<p> * * Details see Quilan: "MDL and categorical theories (Continued)",ML95 * * @param index the index of the given rule (assuming correct) * @return the theory DL, weighted if weight != 1.0 */ public double theoryDL(int index){ double k = ((Rule)m_Ruleset.elementAt(index)).size(); if(k == 0) return 0.0; double tdl = Utils.log2(k); if(k > 1) // Approximation tdl += 2.0 * Utils.log2(tdl); // of log2 star tdl += subsetDL(m_Total, k, k/m_Total); //System.out.println("!!!theory: "+MDL_THEORY_WEIGHT * REDUNDANCY_FACTOR * tdl); return MDL_THEORY_WEIGHT * REDUNDANCY_FACTOR * tdl; }
Example 8
Source File: RuleStats.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * The description length of data given the parameters of the data * based on the ruleset. <p> * Details see Quinlan: "MDL and categorical theories (Continued)",ML95<p> * * @param expFPOverErr expected FP/(FP+FN) * @param cover coverage * @param uncover uncoverage * @param fp False Positive * @param fn False Negative * @return the description length */ public static double dataDL(double expFPOverErr, double cover, double uncover, double fp, double fn){ double totalBits = Utils.log2(cover+uncover+1.0); // how many data? double coverBits, uncoverBits; // What's the error? double expErr; // Expected FP or FN if(Utils.gr(cover, uncover)){ expErr = expFPOverErr*(fp+fn); coverBits = subsetDL(cover, fp, expErr/cover); uncoverBits = Utils.gr(uncover, 0.0) ? subsetDL(uncover, fn, fn/uncover) : 0.0; } else{ expErr = (1.0-expFPOverErr)*(fp+fn); coverBits = Utils.gr(cover, 0.0) ? subsetDL(cover, fp, fp/cover) : 0.0; uncoverBits = subsetDL(uncover, fn, expErr/uncover); } /* System.err.println("!!!cover: " + cover + "|uncover" + uncover + "|coverBits: "+coverBits+"|uncBits: "+ uncoverBits+ "|FPRate: "+expFPOverErr + "|expErr: "+expErr+ "|fp: "+fp+"|fn: "+fn+"|total: "+totalBits); */ return (totalBits + coverBits + uncoverBits); }
Example 9
Source File: TunedRandomForest.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override public void setParametersFromIndex(int x) { tuneParameters=false; //Three paras, evenly distributed, 1 to maxPerPara. //Note that if maxPerPara > numFeaturesInProblem, we have a problem, so it will throw an exception later paras=new int[3]; if(x<1 || x>maxPerPara*maxPerPara*maxPerPara)//Error, invalid range throw new UnsupportedOperationException("ERROR parameter index "+x+" out of range for PolyNomialKernel"); //To change body of generated methods, choose Tools | Templates. int numLevelsIndex=(x-1)/(maxPerPara*maxPerPara); int numFeaturesIndex=((x-1)/(maxPerPara))%maxPerPara; int numTreesIndex=x%maxPerPara; //Need to know number of attributes if(numFeaturesInProblem==0) throw new RuntimeException("Error in TunedRandomForest in set ParametersFromIndex: we do not know the number of attributes, need to call setNumFeaturesInProblem before this call"); //Para 1. Maximum tree depth, m_MaxDepth if(numLevelsIndex==0) paras[0]=0; else paras[0]=numLevelsIndex*(numFeaturesInProblem/maxPerPara); //Para 2. Num features if(numFeaturesIndex==0) paras[1]=(int)Math.sqrt(numFeaturesInProblem); else if(numFeaturesIndex==1) paras[1]=(int) Utils.log2(numFeaturesInProblem)+1; else paras[1]=((numFeaturesIndex-1)*numFeaturesInProblem)/maxPerPara; if(numTreesIndex==0) paras[2]=10; //Weka default else paras[2]=100*numTreesIndex; setMaxDepth(paras[0]); setNumFeaturesForEachTree(paras[1]); setNumTrees(paras[2]); if(m_Debug) System.out.println("Index ="+x+" Num Features ="+numFeaturesInProblem+" Max Depth="+paras[0]+" Num Features ="+paras[1]+" Num Trees ="+paras[2]); }
Example 10
Source File: TSF.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Parses a given list of options to set the parameters of the classifier. * We use this for the tuning mechanism, setting parameters through setOptions <!-- options-start --> * Valid options are: <p/> * <pre> -T * Number of trees.</pre> * * <pre> -I * Number of intervals to fit.</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception{ /* System.out.print("TSF para sets "); for (String str:options) System.out.print(","+str); System.out.print("\n"); */ String numTreesString=Utils.getOption('T', options); if (numTreesString.length() != 0) { numClassifiers = Integer.parseInt(numTreesString); } String numFeaturesString=Utils.getOption('I', options); //Options here are a double between 0 and 1 (proportion of features), a text //string sqrt or log, or an integer number if (numFeaturesString.length() != 0){ try{ if(numFeaturesString.equals("sqrt")) numIntervalsFinder = (numAtts) -> (int)(Math.sqrt(numAtts)); else if(numFeaturesString.equals("log")) numIntervalsFinder = (numAtts) -> (int) Utils.log2(numAtts) + 1; else{ double d=Double.parseDouble(numFeaturesString); if(d<=0) throw new Exception("proportion of features of of range 0 to 1"); if(d<=1) numIntervalsFinder = (numAtts) -> (int)(d*numAtts); else numIntervalsFinder = (numAtts) -> (int)(d); } }catch(Exception e){ System.err.print(" Error: invalid parameter passed to TSF setOptions for number of parameters. Setting to default"); System.err.print("Value"+numIntervalsFinder+" Permissable values: sqrt, log, or a double range 0...1"); numIntervalsFinder = (numAtts) -> (int)(Math.sqrt(numAtts)); } } else numIntervalsFinder = (numAtts) -> (int)(Math.sqrt(numAtts)); }
Example 11
Source File: C45Split.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Returns coding cost for split (used in rule learner). */ public final double codingCost() { return Utils.log2(m_index); }
Example 12
Source File: Discretize.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Test using Kononenko's MDL criterion. * * @param priorCounts * @param bestCounts * @param numInstances * @param numCutPoints * @return true if the split is acceptable */ private boolean KononenkosMDL(double[] priorCounts, double[][] bestCounts, double numInstances, int numCutPoints) { double distPrior, instPrior, distAfter = 0, sum, instAfter = 0; double before, after; int numClassesTotal; // Number of classes occuring in the set numClassesTotal = 0; for (int i = 0; i < priorCounts.length; i++) { if (priorCounts[i] > 0) { numClassesTotal++; } } // Encode distribution prior to split distPrior = SpecialFunctions.log2Binomial(numInstances + numClassesTotal - 1, numClassesTotal - 1); // Encode instances prior to split. instPrior = SpecialFunctions.log2Multinomial(numInstances, priorCounts); before = instPrior + distPrior; // Encode distributions and instances after split. for (int i = 0; i < bestCounts.length; i++) { sum = Utils.sum(bestCounts[i]); distAfter += SpecialFunctions.log2Binomial(sum + numClassesTotal - 1, numClassesTotal - 1); instAfter += SpecialFunctions.log2Multinomial(sum, bestCounts[i]); } // Coding cost after split after = Utils.log2(numCutPoints) + distAfter + instAfter; // Check if split is to be accepted return (before > after); }
Example 13
Source File: Discretize.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Test using Fayyad and Irani's MDL criterion. * * @param priorCounts * @param bestCounts * @param numInstances * @param numCutPoints * @return true if the splits is acceptable */ private boolean FayyadAndIranisMDL(double[] priorCounts, double[][] bestCounts, double numInstances, int numCutPoints) { double priorEntropy, entropy, gain; double entropyLeft, entropyRight, delta; int numClassesTotal, numClassesRight, numClassesLeft; // Compute entropy before split. priorEntropy = ContingencyTables.entropy(priorCounts); // Compute entropy after split. entropy = ContingencyTables.entropyConditionedOnRows(bestCounts); // Compute information gain. gain = priorEntropy - entropy; // Number of classes occuring in the set numClassesTotal = 0; for (int i = 0; i < priorCounts.length; i++) { if (priorCounts[i] > 0) { numClassesTotal++; } } // Number of classes occuring in the left subset numClassesLeft = 0; for (int i = 0; i < bestCounts[0].length; i++) { if (bestCounts[0][i] > 0) { numClassesLeft++; } } // Number of classes occuring in the right subset numClassesRight = 0; for (int i = 0; i < bestCounts[1].length; i++) { if (bestCounts[1][i] > 0) { numClassesRight++; } } // Entropy of the left and the right subsets entropyLeft = ContingencyTables.entropy(bestCounts[0]); entropyRight = ContingencyTables.entropy(bestCounts[1]); // Compute terms for MDL formula delta = Utils.log2(Math.pow(3, numClassesTotal) - 2) - (((double) numClassesTotal * priorEntropy) - (numClassesRight * entropyRight) - (numClassesLeft * entropyLeft)); // Check if split is to be accepted return (gain > (Utils.log2(numCutPoints) + delta) / (double)numInstances); }
Example 14
Source File: CollectiveForest.java From collective-classification-weka-package with GNU General Public License v3.0 | 4 votes |
/** * here initialization and building, possible iterations will happen * * @throws Exception if something goes wrong */ @Override protected void build() throws Exception { AttributeStats stats; int i; // determine number of features to be selected m_KValue = getNumFeatures(); if (m_KValue < 1) m_KValue = (int) Utils.log2(m_Trainset.numAttributes()) + 1; // determine class distribution m_ClassDistribution = new double[2]; stats = m_Trainset.attributeStats(m_Trainset.classIndex()); for (i = 0; i < 2; i++) { if (stats.totalCount > 0) m_ClassDistribution[i] = stats.nominalCounts[i] / stats.totalCount; else m_ClassDistribution[i] = 0; } // the number of instances added to the training set in each iteration m_InstancesPerIteration = (double) m_Testset.numInstances() / getFolds(); if (getDebug()) System.out.println("InstancesPerIteration: " + m_InstancesPerIteration); // build list of sorted test instances m_List = new RankedList(m_Testset, m_ClassDistribution); // build classifier m_Random = new Random(getSeed()); for (i = 0; i <= getFolds(); i++) { if (getVerbose()) { if (getCutOff() > 0) System.out.println( "\nFold " + i + "/" + getFolds() + " (CutOff at " + getCutOff() + ")"); else System.out.println("\nFold " + i + "/" + getFolds()); } buildTrainSet(i); buildClassifier(); // cutoff of folds reached? if ( (i > 0) && (i == getCutOff()) ) break; } }
Example 15
Source File: RuleStats.java From tsml with GNU General Public License v3.0 | 2 votes |
/** * Subset description length: <br> * S(t,k,p) = -k*log2(p)-(n-k)log2(1-p) * * Details see Quilan: "MDL and categorical theories (Continued)",ML95 * * @param t the number of elements in a known set * @param k the number of elements in a subset * @param p the expected proportion of subset known by recipient * @return the subset description length */ public static double subsetDL(double t, double k, double p){ double rt = Utils.gr(p, 0.0) ? (- k*Utils.log2(p)) : 0.0; rt -= (t-k)*Utils.log2(1-p); return rt; }
Example 16
Source File: PMILexiconExpander.java From AffectiveTweets with GNU General Public License v3.0 | 2 votes |
@Override protected Instances process(Instances instances) throws Exception { Instances result = getOutputFormat(); this.calculateWordCounts(instances); String[] sortedWords=this.wordInfo.keySet().toArray(new String[0]); Arrays.sort(sortedWords); for(String word:sortedWords){ WordCount wordCount=this.wordInfo.get(word); if(wordCount.posCount+wordCount.negCount>=this.minFreq){ double posProb=wordCount.posCount/posCount; double negProb=wordCount.negCount/negCount; double semanticOrientation=Utils.log2(posProb)-Utils.log2(negProb); double[] values = new double[result.numAttributes()]; int wordNameIndex=result.attribute("WORD_NAME").index(); values[wordNameIndex]=result.attribute(wordNameIndex).addStringValue(word); values[result.numAttributes()-1]=semanticOrientation; Instance inst=new DenseInstance(1, values); inst.setDataset(result); result.add(inst); } } return result; }