Java Code Examples for weka.core.Instance#weight()
The following examples show how to use
weka.core.Instance#weight() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: EM.java From tsml with GNU General Public License v3.0 | 6 votes |
@Override public double[] call() { double[] llk = new double[2]; double loglk = 0.0, sOW = 0.0; try { for (int i = m_lowNum; i < m_highNum; i++) { Instance in = m_eData.instance(i); loglk += in.weight() * EM.this.logDensityForInstance(in); sOW += in.weight(); if (m_changeWeights) { m_weights[i] = distributionForInstance(in); } } // completedETask(loglk, sOW, true); } catch (Exception ex) { // completedETask(0, 0, false); } llk[0] = loglk; llk[1] = sOW; return llk; }
Example 2
Source File: GlobalScoreSearchAlgorithm.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * LeaveOneOutCV returns the accuracy calculated using Leave One Out * cross validation. The dataset used is m_Instances associated with * the Bayes Network. * @param bayesNet : Bayes Network containing structure to evaluate * @return accuracy (in interval 0..1) measured using leave one out cv. * @throws Exception passed on by updateClassifier */ public double leaveOneOutCV(BayesNet bayesNet) throws Exception { m_BayesNet = bayesNet; double fAccuracy = 0.0; double fWeight = 0.0; Instances instances = bayesNet.m_Instances; bayesNet.estimateCPTs(); for (int iInstance = 0; iInstance < instances.numInstances(); iInstance++) { Instance instance = instances.instance(iInstance); instance.setWeight(-instance.weight()); bayesNet.updateClassifier(instance); fAccuracy += accuracyIncrease(instance); fWeight += instance.weight(); instance.setWeight(-instance.weight()); bayesNet.updateClassifier(instance); } return fAccuracy / fWeight; }
Example 3
Source File: LPS.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Backfits the given data into the tree. */ public void backfitData(Instances data) throws Exception { double totalWeight = 0; double totalSumSquared = 0; // Compute initial class counts double[] classProbs = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = RandomRegressionTree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Fit data into tree backfitData(data, classProbs, totalWeight); }
Example 4
Source File: ConjunctiveRule.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Private function to compute number of accurate instances * based on the specified predicted class * * @param data the data in question * @param clas the predicted class * @return the default accuracy number */ private double computeAccu(Instances data, int clas){ double accu = 0; for(int i=0; i<data.numInstances(); i++){ Instance inst = data.instance(i); if((int)inst.classValue() == clas) accu += inst.weight(); } return accu; }
Example 5
Source File: Evaluation.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Updates all the statistics about a predictors performance for the current * test instance. * * @param predictedValue the numeric value the classifier predicts * @param instance the instance to be classified * @throws Exception if the class of the instance is not set */ protected void updateStatsForPredictor(double predictedValue, Instance instance) throws Exception { if (!instance.classIsMissing()) { // Update stats m_WithClass += instance.weight(); if (Utils.isMissingValue(predictedValue)) { m_Unclassified += instance.weight(); return; } m_SumClass += instance.weight() * instance.classValue(); m_SumSqrClass += instance.weight() * instance.classValue() * instance.classValue(); m_SumClassPredicted += instance.weight() * instance.classValue() * predictedValue; m_SumPredicted += instance.weight() * predictedValue; m_SumSqrPredicted += instance.weight() * predictedValue * predictedValue; updateNumericScores(makeDistribution(predictedValue), makeDistribution(instance.classValue()), instance.weight()); } else m_MissingClass += instance.weight(); if (m_pluginMetrics != null) { for (AbstractEvaluationMetric m : m_pluginMetrics) { if (m instanceof StandardEvaluationMetric) { ((StandardEvaluationMetric) m).updateStatsForPredictor( predictedValue, instance); } else if (m instanceof InformationTheoreticEvaluationMetric) { ((InformationTheoreticEvaluationMetric) m).updateStatsForPredictor( predictedValue, instance); } } } }
Example 6
Source File: Distribution.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Adds given instance to given bag. * * @exception Exception if something goes wrong */ public final void add(int bagIndex,Instance instance) throws Exception { int classIndex; double weight; classIndex = (int)instance.classValue(); weight = instance.weight(); m_perClassPerBag[bagIndex][classIndex] = m_perClassPerBag[bagIndex][classIndex]+weight; m_perBag[bagIndex] = m_perBag[bagIndex]+weight; m_perClass[classIndex] = m_perClass[classIndex]+weight; totaL = totaL+weight; }
Example 7
Source File: Distribution.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Shifts given instance from one bag to another one. * * @exception Exception if something goes wrong */ public final void shift(int from,int to,Instance instance) throws Exception { int classIndex; double weight; classIndex = (int)instance.classValue(); weight = instance.weight(); m_perClassPerBag[from][classIndex] -= weight; m_perClassPerBag[to][classIndex] += weight; m_perBag[from] -= weight; m_perBag[to] += weight; }
Example 8
Source File: MultiClassClassifierUpdateable.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Updates the classifier with the given instance. * * @param instance the new training instance to include in the model * @exception Exception if the instance could not be incorporated in the * model. */ @Override public void updateClassifier(Instance instance) throws Exception { if (!instance.classIsMissing()) { if (m_Classifiers.length == 1) { ((UpdateableClassifier) m_Classifiers[0]).updateClassifier(instance); return; } for (int i = 0; i < m_Classifiers.length; i++) { if (m_Classifiers[i] != null) { m_ClassFilters[i].input(instance); Instance converted = m_ClassFilters[i].output(); if (converted != null) { converted.dataset().setClassIndex(m_ClassAttribute.index()); ((UpdateableClassifier) m_Classifiers[i]) .updateClassifier(converted); if (m_Method == METHOD_1_AGAINST_1) { m_SumOfWeights[i] += converted.weight(); } } } } } }
Example 9
Source File: NominalToBinary.java From tsml with GNU General Public License v3.0 | 5 votes |
/** Computes average class values for each attribute and value */ private void computeAverageClassValues() { double totalCounts, sum; Instance instance; double [] counts; double [][] avgClassValues = new double[getInputFormat().numAttributes()][0]; m_Indices = new int[getInputFormat().numAttributes()][0]; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (att.isNominal()) { avgClassValues[j] = new double [att.numValues()]; counts = new double [att.numValues()]; for (int i = 0; i < getInputFormat().numInstances(); i++) { instance = getInputFormat().instance(i); if (!instance.classIsMissing() && (!instance.isMissing(j))) { counts[(int)instance.value(j)] += instance.weight(); avgClassValues[j][(int)instance.value(j)] += instance.weight() * instance.classValue(); } } sum = Utils.sum(avgClassValues[j]); totalCounts = Utils.sum(counts); if (Utils.gr(totalCounts, 0)) { for (int k = 0; k < att.numValues(); k++) { if (Utils.gr(counts[k], 0)) { avgClassValues[j][k] /= (double)counts[k]; } else { avgClassValues[j][k] = sum / (double)totalCounts; } } } m_Indices[j] = Utils.sort(avgClassValues[j]); } } }
Example 10
Source File: MajorityLabelset.java From meka with GNU General Public License v3.0 | 5 votes |
protected void updateCount(Instance x, int L) { String y = MLUtils.toBitString(x,L); if (classFreqs.containsKey(y)) { double freq = classFreqs.get(y)+x.weight(); classFreqs.put(y, freq); if (maxValue < freq) { maxValue = freq; this.prediction = MLUtils.fromBitString(y); } } else { classFreqs.put(y, x.weight()); } }
Example 11
Source File: Ridor.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Private function to compute default number of accurate instances * in the specified data for m_Class * * @param data the data in question * @return the default accuracy number */ private double computeDefAccu(Instances data){ double defAccu=0; for(int i=0; i<data.numInstances(); i++){ Instance inst = data.instance(i); if(Utils.eq(inst.classValue(), m_Class)) defAccu += inst.weight(); } return defAccu; }
Example 12
Source File: RuleStats.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Find all the instances in the dataset covered/not covered by * the rule in given index, and the correponding simple statistics * and predicted class distributions are stored in the given double array, * which can be obtained by getSimpleStats() and getDistributions().<br> * * @param index the given index, assuming correct * @param insts the dataset to be covered by the rule * @param stats the given double array to hold stats, side-effected * @param dist the given array to hold class distributions, side-effected * if null, the distribution is not necessary * @return the instances covered and not covered by the rule */ private Instances[] computeSimpleStats(int index, Instances insts, double[] stats, double[] dist){ Rule rule = (Rule)m_Ruleset.elementAt(index); Instances[] data = new Instances[2]; data[0] = new Instances(insts, insts.numInstances()); data[1] = new Instances(insts, insts.numInstances()); for(int i=0; i<insts.numInstances(); i++){ Instance datum = insts.instance(i); double weight = datum.weight(); if(rule.covers(datum)){ data[0].add(datum); // Covered by this rule stats[0] += weight; // Coverage if((int)datum.classValue() == (int)rule.getConsequent()) stats[2] += weight; // True positives else stats[4] += weight; // False positives if(dist != null) dist[(int)datum.classValue()] += weight; } else{ data[1].add(datum); // Not covered by this rule stats[1] += weight; if((int)datum.classValue() != (int)rule.getConsequent()) stats[3] += weight; // True negatives else stats[5] += weight; // False negatives } } return data; }
Example 13
Source File: RandomTree.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Backfits the given data into the tree. */ public void backfitData(Instances data) throws Exception { // Compute initial class counts double[] classProbs = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); classProbs[(int) inst.classValue()] += inst.weight(); } // Fit data into tree backfitData(data, classProbs); }
Example 14
Source File: ClassifierSplitModel.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Splits the given set of instances into subsets. * * @exception Exception if something goes wrong */ public final Instances [] split(Instances data) throws Exception { Instances [] instances = new Instances [m_numSubsets]; double [] weights; double newWeight; Instance instance; int subset, i, j; for (j=0;j<m_numSubsets;j++) instances[j] = new Instances((Instances)data, data.numInstances()); for (i = 0; i < data.numInstances(); i++) { instance = ((Instances) data).instance(i); weights = weights(instance); subset = whichSubset(instance); if (subset > -1) instances[subset].add(instance); else for (j = 0; j < m_numSubsets; j++) if (Utils.gr(weights[j],0)) { newWeight = weights[j]*instance.weight(); instances[j].add(instance); instances[j].lastInstance().setWeight(newWeight); } } for (j = 0; j < m_numSubsets; j++) instances[j].compactify(); return instances; }
Example 15
Source File: SimpleKMeansWithSilhouette.java From apogen with Apache License 2.0 | 5 votes |
/** * clusters an instance that has been through the filters. * * @param instance * the instance to assign a cluster to * @param updateErrors * if true, update the within clusters sum of errors * @param useFastDistCalc * whether to use the fast distance calculation or not * @param instanceCanopies * the canopies covering the instance to be clustered, or null if not * using the option to reduce the number of distance computations via * canopies * @return a cluster number */ private int clusterProcessedInstance(Instance instance, boolean updateErrors, boolean useFastDistCalc, long[] instanceCanopies) { double minDist = Integer.MAX_VALUE; int bestCluster = 0; for (int i = 0; i < m_NumClusters; i++) { double dist; if (useFastDistCalc) { if (m_speedUpDistanceCompWithCanopies && instanceCanopies != null && instanceCanopies.length > 0) { try { if (!Canopy.nonEmptyCanopySetIntersection(m_centroidCanopyAssignments.get(i), instanceCanopies)) { continue; } } catch (Exception ex) { ex.printStackTrace(); } dist = m_DistanceFunction.distance(instance, m_ClusterCentroids.instance(i), minDist); } else { dist = m_DistanceFunction.distance(instance, m_ClusterCentroids.instance(i), minDist); } } else { dist = m_DistanceFunction.distance(instance, m_ClusterCentroids.instance(i)); } if (dist < minDist) { minDist = dist; bestCluster = i; } } if (updateErrors) { if (m_DistanceFunction instanceof EuclideanDistance) { // Euclidean distance to Squared Euclidean distance minDist *= minDist * instance.weight(); } m_squaredErrors[bestCluster] += minDist; } return bestCluster; }
Example 16
Source File: CollectiveTree.java From collective-classification-weka-package with GNU General Public License v3.0 | 4 votes |
/** * determines the distribution of the instances with a non-missing value * at the given attribute position. * @param data the instances to work on * @param indices the sorted indices * @param att the attribute to determine the distribution for * @return the distribution */ protected double[] determineAttributeDistribution( Instances data, int[] indices, int att) { double[] result; int i; Instance inst; int count; double[] values; double median; // nominal attribute if (data.attribute(att).isNominal()) { result = new double[data.attribute(att).numValues()]; // determine attribute distribution (necessary to distribute instances // with no class and missing attribute) for (i = 0; i < indices.length; i++) { inst = data.instance(indices[i]); if (inst.isMissing(att)) break; result[(int) inst.value(att)] += inst.weight(); } } // numeric attribute else { result = new double[2]; // less or greater/equal than median // determine number of instances w/o missing attribute count = 0; for (i = 0; i < indices.length; i++) { inst = data.instance(indices[i]); if (inst.isMissing(att)) break; count++; } // determine median values = new double[count]; for (i = 0; i < count; i++) { inst = data.instance(indices[i]); values[i] = inst.value(att); } if (values.length == 0) median = 0; else if (values.length == 1) median = values[0]; else median = Utils.kthSmallestValue(values, values.length / 2); // disitribute for (i = 0; i < count; i++) { inst = data.instance(indices[i]); if (Utils.sm(inst.value(att), median)) result[0] += inst.weight(); else result[1] += inst.weight(); } } if (Utils.gr(Utils.sum(result), 0)) Utils.normalize(result); return result; }
Example 17
Source File: PrincipalComponents.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Transform an instance in original (unormalized) format. Convert back * to the original space if requested. * @param instance an instance in the original (unormalized) format * @return a transformed instance * @throws Exception if instance cant be transformed */ public Instance convertInstance(Instance instance) throws Exception { if (m_eigenvalues == null) { throw new Exception("convertInstance: Principal components not " +"built yet"); } double[] newVals = new double[m_outputNumAtts]; Instance tempInst = (Instance)instance.copy(); if (!instance.dataset().equalHeaders(m_trainHeader)) { throw new Exception("Can't convert instance: header's don't match: " +"PrincipalComponents\n" + instance.dataset().equalHeadersMsg(m_trainHeader)); } m_replaceMissingFilter.input(tempInst); m_replaceMissingFilter.batchFinished(); tempInst = m_replaceMissingFilter.output(); /*if (m_normalize) { m_normalizeFilter.input(tempInst); m_normalizeFilter.batchFinished(); tempInst = m_normalizeFilter.output(); }*/ m_nominalToBinFilter.input(tempInst); m_nominalToBinFilter.batchFinished(); tempInst = m_nominalToBinFilter.output(); if (m_attributeFilter != null) { m_attributeFilter.input(tempInst); m_attributeFilter.batchFinished(); tempInst = m_attributeFilter.output(); } if (!m_center) { m_standardizeFilter.input(tempInst); m_standardizeFilter.batchFinished(); tempInst = m_standardizeFilter.output(); } else { m_centerFilter.input(tempInst); m_centerFilter.batchFinished(); tempInst = m_centerFilter.output(); } if (m_hasClass) { newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex()); } double cumulative = 0; for (int i = m_numAttribs - 1; i >= 0; i--) { double tempval = 0.0; for (int j = 0; j < m_numAttribs; j++) { tempval += (m_eigenvectors[j][m_sortedEigens[i]] * tempInst.value(j)); } newVals[m_numAttribs - i - 1] = tempval; cumulative+=m_eigenvalues[m_sortedEigens[i]]; if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) { break; } } if (!m_transBackToOriginal) { if (instance instanceof SparseInstance) { return new SparseInstance(instance.weight(), newVals); } else { return new DenseInstance(instance.weight(), newVals); } } else { if (instance instanceof SparseInstance) { return convertInstanceToOriginal(new SparseInstance(instance.weight(), newVals)); } else { return convertInstanceToOriginal(new DenseInstance(instance.weight(), newVals)); } } }
Example 18
Source File: IBk.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Turn the list of nearest neighbors into a probability distribution. * * @param neighbours the list of nearest neighboring instances * @param distances the distances of the neighbors * @return the probability distribution * @throws Exception if computation goes wrong or has no class attribute */ protected double [] makeDistribution(Instances neighbours, double[] distances) throws Exception { double total = 0, weight; double [] distribution = new double [m_NumClasses]; // Set up a correction to the estimator if (m_ClassType == Attribute.NOMINAL) { for(int i = 0; i < m_NumClasses; i++) { distribution[i] = 1.0 / Math.max(1,m_Train.numInstances()); } total = (double)m_NumClasses / Math.max(1,m_Train.numInstances()); } for(int i=0; i < neighbours.numInstances(); i++) { // Collect class counts Instance current = neighbours.instance(i); distances[i] = distances[i]*distances[i]; distances[i] = Math.sqrt(distances[i]/m_NumAttributesUsed); switch (m_DistanceWeighting) { case WEIGHT_INVERSE: weight = 1.0 / (distances[i] + 0.001); // to avoid div by zero break; case WEIGHT_SIMILARITY: weight = 1.0 - distances[i]; break; default: // WEIGHT_NONE: weight = 1.0; break; } weight *= current.weight(); try { switch (m_ClassType) { case Attribute.NOMINAL: distribution[(int)current.classValue()] += weight; break; case Attribute.NUMERIC: distribution[0] += current.classValue() * weight; break; } } catch (Exception ex) { throw new Error("Data has no class attribute!"); } total += weight; } // Normalise distribution if (total > 0) { Utils.normalize(distribution, total); } return distribution; }
Example 19
Source File: ReplaceMissingValues.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Signify that this batch of input to the filter is finished. * If the filter requires all instances prior to filtering, * output() may now be called to retrieve the filtered instances. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_ModesAndMeans == null) { // Compute modes and means double sumOfWeights = getInputFormat().sumOfWeights(); double[][] counts = new double[getInputFormat().numAttributes()][]; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (getInputFormat().attribute(i).isNominal()) { counts[i] = new double[getInputFormat().attribute(i).numValues()]; if (counts[i].length > 0) counts[i][0] = sumOfWeights; } } double[] sums = new double[getInputFormat().numAttributes()]; for (int i = 0; i < sums.length; i++) { sums[i] = sumOfWeights; } double[] results = new double[getInputFormat().numAttributes()]; for (int j = 0; j < getInputFormat().numInstances(); j++) { Instance inst = getInputFormat().instance(j); for (int i = 0; i < inst.numValues(); i++) { if (!inst.isMissingSparse(i)) { double value = inst.valueSparse(i); if (inst.attributeSparse(i).isNominal()) { if (counts[inst.index(i)].length > 0) { counts[inst.index(i)][(int)value] += inst.weight(); counts[inst.index(i)][0] -= inst.weight(); } } else if (inst.attributeSparse(i).isNumeric()) { results[inst.index(i)] += inst.weight() * inst.valueSparse(i); } } else { if (inst.attributeSparse(i).isNominal()) { if (counts[inst.index(i)].length > 0) { counts[inst.index(i)][0] -= inst.weight(); } } else if (inst.attributeSparse(i).isNumeric()) { sums[inst.index(i)] -= inst.weight(); } } } } m_ModesAndMeans = new double[getInputFormat().numAttributes()]; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (getInputFormat().attribute(i).isNominal()) { if (counts[i].length == 0) m_ModesAndMeans[i] = Utils.missingValue(); else m_ModesAndMeans[i] = (double)Utils.maxIndex(counts[i]); } else if (getInputFormat().attribute(i).isNumeric()) { if (Utils.gr(sums[i], 0)) { m_ModesAndMeans[i] = results[i] / sums[i]; } } } // Convert pending input instances for(int i = 0; i < getInputFormat().numInstances(); i++) { convertInstance(getInputFormat().instance(i)); } } // Free memory flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); }
Example 20
Source File: LPS.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Computes array that indicates node membership. Array locations are * allocated based on breadth-first exploration of the tree. */ @Override public double[] getMembershipValues(Instance instance) throws Exception { if (m_zeroR != null) { double[] m = new double[1]; m[0] = instance.weight(); return m; } else { // Set up array for membership values double[] a = new double[numElements()]; // Initialize queues Queue<Double> queueOfWeights = new LinkedList<Double>(); Queue<Tree> queueOfNodes = new LinkedList<Tree>(); queueOfWeights.add(instance.weight()); queueOfNodes.add(m_Tree); int index = 0; // While the queue is not empty while (!queueOfNodes.isEmpty()) { a[index++] = queueOfWeights.poll(); Tree node = queueOfNodes.poll(); // Is node a leaf? if (node.m_Attribute <= -1) { continue; } // Compute weight distribution double[] weights = new double[node.m_Successors.length]; if (instance.isMissing(node.m_Attribute)) { System.arraycopy(node.m_Prop, 0, weights, 0, node.m_Prop.length); } else if (m_Info.attribute(node.m_Attribute).isNominal()) { weights[(int) instance.value(node.m_Attribute)] = 1.0; } else { if (instance.value(node.m_Attribute) < node.m_SplitPoint) { weights[0] = 1.0; } else { weights[1] = 1.0; } } for (int i = 0; i < node.m_Successors.length; i++) { queueOfNodes.add(node.m_Successors[i]); queueOfWeights.add(a[index - 1] * weights[i]); } } return a; } }