Java Code Examples for weka.core.Utils#gr()
The following examples show how to use
weka.core.Utils#gr() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MINND.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Compute the target function to minimize in gradient descent * The formula is:<br/> * 1/2*sum[i=1..p](f(X, Xi)-var(Y, Yi))^2 <p/> * where p is the number of exemplars and Y is the class label. * In the case of X=MU, f() is the Euclidean distance between two * exemplars together with the related weights and var() is * sqrt(numDimension)*(Y-Yi) where Y-Yi is either 0 (when Y==Yi) * or 1 (Y!=Yi) * * @param x the weights of the exemplar in question * @param rowpos row index of x in X * @param Y the observed class label * @return the result of the target function */ public double target(double[] x, double[][] X, int rowpos, double[] Y){ double y = Y[rowpos], result=0; for(int i=0; i < X.length; i++){ if((i != rowpos) && (X[i] != null)){ double var = (y==Y[i]) ? 0.0 : Math.sqrt((double)m_Dimension - 1); double f=0; for(int j=0; j < m_Dimension; j++) if(Utils.gr(m_Variance[rowpos][j], 0.0)){ f += x[j]*(X[rowpos][j]-X[i][j]) * (X[rowpos][j]-X[i][j]); //System.out.println("i:"+i+" j: "+j+" row: "+rowpos); } f = Math.sqrt(f); //System.out.println("???distance between "+rowpos+" and "+i+": "+f+"|y:"+y+" vs "+Y[i]); if(Double.isInfinite(f)) System.exit(1); result += 0.5 * (f - var) * (f - var); } } //System.out.println("???target: "+result); return result; }
Example 2
Source File: LMT.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Classifies an instance. * * @param instance the instance to classify * @return the classification * @throws Exception if instance can't be classified successfully */ public double classifyInstance(Instance instance) throws Exception { double maxProb = -1; int maxIndex = 0; //classify by maximum probability double[] probs = distributionForInstance(instance); for (int j = 0; j < instance.numClasses(); j++) { if (Utils.gr(probs[j], maxProb)) { maxIndex = j; maxProb = probs[j]; } } return (double)maxIndex; }
Example 3
Source File: FT.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Classifies an instance. * * @param instance the instance to classify * @return the classification * @throws Exception if instance can't be classified successfully */ public double classifyInstance(Instance instance) throws Exception { double maxProb = -1; int maxIndex = 0; //classify by maximum probability double[] probs = distributionForInstance(instance); for (int j = 0; j < instance.numClasses(); j++) { if (Utils.gr(probs[j], maxProb)) { maxIndex = j; maxProb = probs[j]; } } return (double)maxIndex; }
Example 4
Source File: EntropySplitCrit.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Computes entropy of test distribution with respect to training distribution. */ public final double splitCritValue(Distribution train, Distribution test) { double result = 0; int numClasses = 0; int i, j; // Find out relevant number of classes for (j = 0; j < test.numClasses(); j++) if (Utils.gr(train.perClass(j), 0) || Utils.gr(test.perClass(j), 0)) numClasses++; // Compute entropy of test data with respect to training data for (i = 0; i < test.numBags(); i++) if (Utils.gr(test.perBag(i),0)) { for (j = 0; j < test.numClasses(); j++) if (Utils.gr(test.perClassPerBag(i, j), 0)) result -= test.perClassPerBag(i, j)* Math.log(train.perClassPerBag(i, j) + 1); result += test.perBag(i) * Math.log(train.perBag(i) + numClasses); } return result / log2; }
Example 5
Source File: ClassifierDecList.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Classifies an instance. * * @exception Exception if something goes wrong */ public double classifyInstance(Instance instance) throws Exception { double maxProb = -1; double currentProb; int maxIndex = 0; int j; for (j = 0; j < instance.numClasses(); j++){ currentProb = getProbs(j,instance,1); if (Utils.gr(currentProb,maxProb)){ maxIndex = j; maxProb = currentProb; } } if (Utils.eq(maxProb,0)) return -1.0; else return (double)maxIndex; }
Example 6
Source File: NominalToBinary.java From tsml with GNU General Public License v3.0 | 5 votes |
/** Computes average class values for each attribute and value */ private void computeAverageClassValues() { double totalCounts, sum; Instance instance; double [] counts; double [][] avgClassValues = new double[getInputFormat().numAttributes()][0]; m_Indices = new int[getInputFormat().numAttributes()][0]; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (att.isNominal()) { avgClassValues[j] = new double [att.numValues()]; counts = new double [att.numValues()]; for (int i = 0; i < getInputFormat().numInstances(); i++) { instance = getInputFormat().instance(i); if (!instance.classIsMissing() && (!instance.isMissing(j))) { counts[(int)instance.value(j)] += instance.weight(); avgClassValues[j][(int)instance.value(j)] += instance.weight() * instance.classValue(); } } sum = Utils.sum(avgClassValues[j]); totalCounts = Utils.sum(counts); if (Utils.gr(totalCounts, 0)) { for (int k = 0; k < att.numValues(); k++) { if (Utils.gr(counts[k], 0)) { avgClassValues[j][k] /= (double)counts[k]; } else { avgClassValues[j][k] = sum / (double)totalCounts; } } } m_Indices[j] = Utils.sort(avgClassValues[j]); } } }
Example 7
Source File: Distribution.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Returns class with highest frequency over all bags. */ public final int maxClass() { double maxCount = 0; int maxIndex = 0; int i; for (i=0;i<m_perClass.length;i++) if (Utils.gr(m_perClass[i],maxCount)) { maxCount = m_perClass[i]; maxIndex = i; } return maxIndex; }
Example 8
Source File: SMO.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Quick and dirty check whether the quadratic programming problem is solved. * * @throws Exception if checking fails */ protected void checkClassifier() throws Exception { double sum = 0; for (int i = 0; i < m_alpha.length; i++) { if (m_alpha[i] > 0) { sum += m_class[i] * m_alpha[i]; } } System.err.println("Sum of y(i) * alpha(i): " + sum); for (int i = 0; i < m_alpha.length; i++) { double output = SVMOutput(i, m_data.instance(i)); if (Utils.eq(m_alpha[i], 0)) { if (Utils.sm(m_class[i] * output, 1)) { System.err.println("KKT condition 1 violated: " + m_class[i] * output); } } if (Utils.gr(m_alpha[i], 0) && Utils.sm(m_alpha[i], m_C * m_data.instance(i).weight())) { if (!Utils.eq(m_class[i] * output, 1)) { System.err.println("KKT condition 2 violated: " + m_class[i] * output); } } if (Utils.eq(m_alpha[i], m_C * m_data.instance(i).weight())) { if (Utils.gr(m_class[i] * output, 1)) { System.err.println("KKT condition 3 violated: " + m_class[i] * output); } } } }
Example 9
Source File: MISMO.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Quick and dirty check whether the quadratic programming problem is solved. * * @throws Exception if something goes wrong */ protected void checkClassifier() throws Exception { double sum = 0; for (int i = 0; i < m_alpha.length; i++) { if (m_alpha[i] > 0) { sum += m_class[i] * m_alpha[i]; } } System.err.println("Sum of y(i) * alpha(i): " + sum); for (int i = 0; i < m_alpha.length; i++) { double output = SVMOutput(i, m_data.instance(i)); if (Utils.eq(m_alpha[i], 0)) { if (Utils.sm(m_class[i] * output, 1)) { System.err.println("KKT condition 1 violated: " + m_class[i] * output); } } if (Utils.gr(m_alpha[i], 0) && Utils.sm(m_alpha[i], m_C * m_data.instance(i).weight())) { if (!Utils.eq(m_class[i] * output, 1)) { System.err.println("KKT condition 2 violated: " + m_class[i] * output); } } if (Utils.eq(m_alpha[i], m_C * m_data.instance(i).weight())) { if (Utils.gr(m_class[i] * output, 1)) { System.err.println("KKT condition 3 violated: " + m_class[i] * output); } } } }
Example 10
Source File: MakeDecList.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Returns the class distribution for an instance. * * @exception Exception if distribution can't be computed */ public double[] distributionForInstance(Instance instance) throws Exception { double [] currentProbs = null; double [] sumProbs; double currentWeight, weight = 1; int i,j; // Get probabilities. sumProbs = new double [instance.numClasses()]; i = 0; while (Utils.gr(weight,0)){ currentWeight = ((ClassifierDecList)theRules.elementAt(i)).weight(instance); if (Utils.gr(currentWeight,0)) { currentProbs = ((ClassifierDecList)theRules.elementAt(i)). distributionForInstance(instance); for (j = 0; j < sumProbs.length; j++) sumProbs[j] += weight*currentProbs[j]; weight = weight*(1-currentWeight); } i++; } return sumProbs; }
Example 11
Source File: ReplaceMissingValues.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Signify that this batch of input to the filter is finished. * If the filter requires all instances prior to filtering, * output() may now be called to retrieve the filtered instances. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_ModesAndMeans == null) { // Compute modes and means double sumOfWeights = getInputFormat().sumOfWeights(); double[][] counts = new double[getInputFormat().numAttributes()][]; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (getInputFormat().attribute(i).isNominal()) { counts[i] = new double[getInputFormat().attribute(i).numValues()]; if (counts[i].length > 0) counts[i][0] = sumOfWeights; } } double[] sums = new double[getInputFormat().numAttributes()]; for (int i = 0; i < sums.length; i++) { sums[i] = sumOfWeights; } double[] results = new double[getInputFormat().numAttributes()]; for (int j = 0; j < getInputFormat().numInstances(); j++) { Instance inst = getInputFormat().instance(j); for (int i = 0; i < inst.numValues(); i++) { if (!inst.isMissingSparse(i)) { double value = inst.valueSparse(i); if (inst.attributeSparse(i).isNominal()) { if (counts[inst.index(i)].length > 0) { counts[inst.index(i)][(int)value] += inst.weight(); counts[inst.index(i)][0] -= inst.weight(); } } else if (inst.attributeSparse(i).isNumeric()) { results[inst.index(i)] += inst.weight() * inst.valueSparse(i); } } else { if (inst.attributeSparse(i).isNominal()) { if (counts[inst.index(i)].length > 0) { counts[inst.index(i)][0] -= inst.weight(); } } else if (inst.attributeSparse(i).isNumeric()) { sums[inst.index(i)] -= inst.weight(); } } } } m_ModesAndMeans = new double[getInputFormat().numAttributes()]; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (getInputFormat().attribute(i).isNominal()) { if (counts[i].length == 0) m_ModesAndMeans[i] = Utils.missingValue(); else m_ModesAndMeans[i] = (double)Utils.maxIndex(counts[i]); } else if (getInputFormat().attribute(i).isNumeric()) { if (Utils.gr(sums[i], 0)) { m_ModesAndMeans[i] = results[i] / sums[i]; } } } // Convert pending input instances for(int i = 0; i < getInputFormat().numInstances(); i++) { convertInstance(getInputFormat().instance(i)); } } // Free memory flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); }
Example 12
Source File: YATSI.java From collective-classification-weka-package with GNU General Public License v3.0 | 4 votes |
/** * internal function for determining the class distribution for an instance, * will be overridden by derived classes. <br/> * * @param instance the instance to get the distribution for * @return the distribution for the given instance * @throws Exception if something goes wrong */ @Override protected double[] getDistribution(Instance instance) throws Exception { int index; int i; double[] result; Instances neighbors; Instance inst; double[] count; double[] countNum; int labelIndex; result = null; // find instance index = m_Data.indexOf(instance); if (index > -1) { // get neighbors neighbors = m_NNSearch.kNearestNeighbours( m_Data.get(index), m_KNNDetermined); // count class label count = new double[neighbors.numClasses()]; countNum = new double[neighbors.numClasses()]; for (i = 0; i < neighbors.numInstances(); i++) { inst = neighbors.instance(i); if (!inst.classIsMissing()) { count[(int) inst.classValue()] += inst.weight(); countNum[(int) inst.classValue()]++; } } // build result result = new double[instance.numClasses()]; for (i = 0; i < result.length; i++) result[i] = count[i]; if (Utils.gr(Utils.sum(result), 0)) Utils.normalize(result); else System.out.println( "No summed up weights: " + instance + ", counts=" + Utils.arrayToString(countNum)); labelIndex = Utils.maxIndex(count); // is it a clear-cut distribution? if (!Utils.eq(Utils.sum(count) - count[labelIndex], 0)) m_ClearCutDistribution++; // did the label change due to weights? if (Utils.maxIndex(countNum) != labelIndex) m_WeightFlips++; } else { throw new Exception("Cannot find instance: " + instance + "\n" + " -> pos=" + index + " = " + m_Data.get(StrictMath.abs(index))); } return result; }
Example 13
Source File: PairedStats.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Calculates the derived statistics (significance etc). */ public void calculateDerived() { xStats.calculateDerived(); yStats.calculateDerived(); differencesStats.calculateDerived(); correlation = Double.NaN; if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev) && !Utils.eq(xStats.stdDev, 0)) { double slope = (xySum - xStats.sum * yStats.sum / count) / (xStats.sumSq - xStats.sum * xStats.mean); if (!Utils.eq(yStats.stdDev, 0)) { correlation = slope * xStats.stdDev / yStats.stdDev; } else { correlation = 1.0; } } if (Utils.gr(differencesStats.stdDev, 0)) { double tval = differencesStats.mean * Math.sqrt(count) / differencesStats.stdDev; if (m_degreesOfFreedom >= 1){ differencesProbability = Statistics.FProbability(tval * tval, 1, m_degreesOfFreedom); } else { if (count > 1) { differencesProbability = Statistics.FProbability(tval * tval, 1, (int) count - 1); } else { differencesProbability = 1; } } } else { if (differencesStats.sumSq == 0) { differencesProbability = 1.0; } else { differencesProbability = 0.0; } } differencesSignificance = 0; if (differencesProbability <= sigLevel) { if (xStats.mean > yStats.mean) { differencesSignificance = 1; } else { differencesSignificance = -1; } } }
Example 14
Source File: ConjunctiveRule.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Builds a single rule learner with REP dealing with nominal classes or * numeric classes. * For nominal classes, this rule learner predicts a distribution on * the classes. * For numeric classes, this learner predicts a single value. * * @param instances the training data * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class Instances data = new Instances(instances); data.deleteWithMissingClass(); if(data.numInstances() < m_Folds) throw new Exception("Not enough data for REP."); m_ClassAttribute = data.classAttribute(); if(m_ClassAttribute.isNominal()) m_NumClasses = m_ClassAttribute.numValues(); else m_NumClasses = 1; m_Antds = new FastVector(); m_DefDstr = new double[m_NumClasses]; m_Cnsqt = new double[m_NumClasses]; m_Targets = new FastVector(); m_Random = new Random(m_Seed); if(m_NumAntds != -1){ grow(data); } else{ data.randomize(m_Random); // Split data into Grow and Prune data.stratify(m_Folds); Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random); Instances pruneData=data.testCV(m_Folds, m_Folds-1); grow(growData); // Build this rule prune(pruneData); // Prune this rule } if(m_ClassAttribute.isNominal()){ Utils.normalize(m_Cnsqt); if(Utils.gr(Utils.sum(m_DefDstr), 0)) Utils.normalize(m_DefDstr); } }
Example 15
Source File: MINND.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * This function calculates the Kullback Leibler distance between * two normal distributions. This distance is always positive. * Kullback Leibler distance = integral{f(X)ln(f(X)/g(X))} * Note that X is a vector. Since we assume dimensions are independent * f(X)(g(X) the same) is actually the product of normal density * functions of each dimensions. Also note that it should be log2 * instead of (ln) in the formula, but we use (ln) simply for computational * convenience. * * The result is as follows, suppose there are P dimensions, and f(X) * is the first distribution and g(X) is the second: * Kullback = sum[1..P](ln(SIGMA2/SIGMA1)) + * sum[1..P](SIGMA1^2 / (2*(SIGMA2^2))) + * sum[1..P]((MU1-MU2)^2 / (2*(SIGMA2^2))) - * P/2 * * @param mu1 mu of the first normal distribution * @param mu2 mu of the second normal distribution * @param var1 variance(SIGMA^2) of the first normal distribution * @param var2 variance(SIGMA^2) of the second normal distribution * @return the Kullback distance of two distributions */ public double kullback(double[] mu1, double[] mu2, double[] var1, double[] var2, int pos){ int p = mu1.length; double result = 0; for(int y=0; y < p; y++){ if((Utils.gr(var1[y], 0)) && (Utils.gr(var2[y], 0))){ result += ((Math.log(Math.sqrt(var2[y]/var1[y]))) + (var1[y] / (2.0*var2[y])) + (m_Change[pos][y] * (mu1[y]-mu2[y])*(mu1[y]-mu2[y]) / (2.0*var2[y])) - 0.5); } } return result; }
Example 16
Source File: MINND.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Pre-process the given exemplar according to the other exemplars * in the given exemplars. It also updates noise data statistics. * * @param data the whole exemplars * @param pos the position of given exemplar in data * @return the processed exemplar * @throws Exception if the returned exemplar is wrong */ public Instance preprocess(Instances data, int pos) throws Exception{ Instance before = data.instance(pos); if((int)before.classValue() == 0){ m_NoiseM[pos] = null; m_NoiseV[pos] = null; return before; } Instances after_relationInsts =before.attribute(1).relation().stringFreeStructure(); Instances noises_relationInsts =before.attribute(1).relation().stringFreeStructure(); Instances newData = m_Attributes; Instance after = new DenseInstance(before.numAttributes()); Instance noises = new DenseInstance(before.numAttributes()); after.setDataset(newData); noises.setDataset(newData); for(int g=0; g < before.relationalValue(1).numInstances(); g++){ Instance datum = before.relationalValue(1).instance(g); double[] dists = new double[data.numInstances()]; for(int i=0; i < data.numInstances(); i++){ if(i != pos) dists[i] = distance(datum, m_Mean[i], m_Variance[i], i); else dists[i] = Double.POSITIVE_INFINITY; } int[] pred = new int[m_NumClasses]; for(int n=0; n < pred.length; n++) pred[n] = 0; for(int o=0; o<m_Select; o++){ int index = Utils.minIndex(dists); pred[(int)m_Class[index]]++; dists[index] = Double.POSITIVE_INFINITY; } int clas = Utils.maxIndex(pred); if((int)before.classValue() != clas) noises_relationInsts.add(datum); else after_relationInsts.add(datum); } int relationValue; relationValue = noises.attribute(1).addRelation( noises_relationInsts); noises.setValue(0,before.value(0)); noises.setValue(1, relationValue); noises.setValue(2, before.classValue()); relationValue = after.attribute(1).addRelation( after_relationInsts); after.setValue(0,before.value(0)); after.setValue(1, relationValue); after.setValue(2, before.classValue()); if(Utils.gr(noises.relationalValue(1).sumOfWeights(), 0)){ for (int i=0; i<m_Dimension; i++) { m_NoiseM[pos][i] = noises.relationalValue(1).meanOrMode(i); m_NoiseV[pos][i] = noises.relationalValue(1).variance(i); if(Utils.eq(m_NoiseV[pos][i],0.0)) m_NoiseV[pos][i] = m_ZERO; } /* for(int y=0; y < m_NoiseV[pos].length; y++){ if(Utils.eq(m_NoiseV[pos][y],0.0)) m_NoiseV[pos][y] = m_ZERO; } */ } else{ m_NoiseM[pos] = null; m_NoiseV[pos] = null; } return after; }
Example 17
Source File: CollectiveTree.java From collective-classification-weka-package with GNU General Public License v3.0 | 4 votes |
/** * determines the distribution of the instances with a non-missing value * at the given attribute position. * @param data the instances to work on * @param indices the sorted indices * @param att the attribute to determine the distribution for * @return the distribution */ protected double[] determineAttributeDistribution( Instances data, int[] indices, int att) { double[] result; int i; Instance inst; int count; double[] values; double median; // nominal attribute if (data.attribute(att).isNominal()) { result = new double[data.attribute(att).numValues()]; // determine attribute distribution (necessary to distribute instances // with no class and missing attribute) for (i = 0; i < indices.length; i++) { inst = data.instance(indices[i]); if (inst.isMissing(att)) break; result[(int) inst.value(att)] += inst.weight(); } } // numeric attribute else { result = new double[2]; // less or greater/equal than median // determine number of instances w/o missing attribute count = 0; for (i = 0; i < indices.length; i++) { inst = data.instance(indices[i]); if (inst.isMissing(att)) break; count++; } // determine median values = new double[count]; for (i = 0; i < count; i++) { inst = data.instance(indices[i]); values[i] = inst.value(att); } if (values.length == 0) median = 0; else if (values.length == 1) median = values[0]; else median = Utils.kthSmallestValue(values, values.length / 2); // disitribute for (i = 0; i < count; i++) { inst = data.instance(indices[i]); if (Utils.sm(inst.value(att), median)) result[0] += inst.weight(); else result[1] += inst.weight(); } } if (Utils.gr(Utils.sum(result), 0)) Utils.normalize(result); return result; }
Example 18
Source File: MINND.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Use gradient descent to distort the MU parameter for * the exemplar. The exemplar can be in the specified row in the * given matrix, which has numExemplar rows and numDimension columns; * or not in the matrix. * * @param row the given row index * @param mean */ public void findWeights(int row, double[][] mean){ double[] neww = new double[m_Dimension]; double[] oldw = new double[m_Dimension]; System.arraycopy(m_Change[row], 0, neww, 0, m_Dimension); //for(int z=0; z<m_Dimension; z++) //System.out.println("mu("+row+"): "+origin[z]+" | "+newmu[z]); double newresult = target(neww, mean, row, m_Class); double result = Double.POSITIVE_INFINITY; double rate= 0.05; if(m_Rate != -1) rate = m_Rate; //System.out.println("???Start searching ..."); search: while(Utils.gr((result-newresult), m_STOP)){ // Full step oldw = neww; neww= new double[m_Dimension]; double[] delta = delta(oldw, mean, row, m_Class); for(int i=0; i < m_Dimension; i++) if(Utils.gr(m_Variance[row][i], 0.0)) neww[i] = oldw[i] + rate * delta[i]; result = newresult; newresult = target(neww, mean, row, m_Class); //System.out.println("???old: "+result+"|new: "+newresult); while(Utils.gr(newresult, result)){ // Search back //System.out.println("search back"); if(m_Rate == -1){ rate *= m_Decay; // Decay for(int i=0; i < m_Dimension; i++) if(Utils.gr(m_Variance[row][i], 0.0)) neww[i] = oldw[i] + rate * delta[i]; newresult = target(neww, mean, row, m_Class); } else{ for(int i=0; i < m_Dimension; i++) neww[i] = oldw[i]; break search; } } } //System.out.println("???Stop"); m_Change[row] = neww; }
Example 19
Source File: MultiClassClassifierUpdateable.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Returns the distribution for an instance. * * @param inst the instance to get the distribution for * @return the distribution * @throws Exception if the distribution can't be computed successfully */ @Override public double[] distributionForInstance(Instance inst) throws Exception { if (m_Classifiers.length == 1) { return m_Classifiers[0].distributionForInstance(inst); } double[] probs = new double[inst.numClasses()]; if (m_Method == METHOD_1_AGAINST_1) { double[][] r = new double[inst.numClasses()][inst.numClasses()]; double[][] n = new double[inst.numClasses()][inst.numClasses()]; for (int i = 0; i < m_ClassFilters.length; i++) { if (m_Classifiers[i] != null && m_SumOfWeights[i] > 0) { Instance tempInst = (Instance) inst.copy(); tempInst.setDataset(m_TwoClassDataset); double[] current = m_Classifiers[i].distributionForInstance(tempInst); Range range = new Range( ((RemoveWithValues) m_ClassFilters[i]).getNominalIndices()); range.setUpper(m_ClassAttribute.numValues()); int[] pair = range.getSelection(); if (m_pairwiseCoupling && inst.numClasses() > 2) { r[pair[0]][pair[1]] = current[0]; n[pair[0]][pair[1]] = m_SumOfWeights[i]; } else { if (current[0] > current[1]) { probs[pair[0]] += 1.0; } else { probs[pair[1]] += 1.0; } } } } if (m_pairwiseCoupling && inst.numClasses() > 2) { try { return pairwiseCoupling(n, r); } catch (IllegalArgumentException ex) { } } if (Utils.gr(Utils.sum(probs), 0)) { Utils.normalize(probs); } return probs; } else { probs = super.distributionForInstance(inst); } /* * if (probs.length == 1) { // ZeroR made the prediction return new * double[m_ClassAttribute.numValues()]; } */ return probs; }
Example 20
Source File: RuleStats.java From tsml with GNU General Public License v3.0 | 2 votes |
/** * Subset description length: <br> * S(t,k,p) = -k*log2(p)-(n-k)log2(1-p) * * Details see Quilan: "MDL and categorical theories (Continued)",ML95 * * @param t the number of elements in a known set * @param k the number of elements in a subset * @param p the expected proportion of subset known by recipient * @return the subset description length */ public static double subsetDL(double t, double k, double p){ double rt = Utils.gr(p, 0.0) ? (- k*Utils.log2(p)) : 0.0; rt -= (t-k)*Utils.log2(1-p); return rt; }