Java Code Examples for weka.core.Instance#setWeight()
The following examples show how to use
weka.core.Instance#setWeight() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GlobalScoreSearchAlgorithm.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * LeaveOneOutCV returns the accuracy calculated using Leave One Out * cross validation. The dataset used is m_Instances associated with * the Bayes Network. * @param bayesNet : Bayes Network containing structure to evaluate * @return accuracy (in interval 0..1) measured using leave one out cv. * @throws Exception passed on by updateClassifier */ public double leaveOneOutCV(BayesNet bayesNet) throws Exception { m_BayesNet = bayesNet; double fAccuracy = 0.0; double fWeight = 0.0; Instances instances = bayesNet.m_Instances; bayesNet.estimateCPTs(); for (int iInstance = 0; iInstance < instances.numInstances(); iInstance++) { Instance instance = instances.instance(iInstance); instance.setWeight(-instance.weight()); bayesNet.updateClassifier(instance); fAccuracy += accuracyIncrease(instance); fWeight += instance.weight(); instance.setWeight(-instance.weight()); bayesNet.updateClassifier(instance); } return fAccuracy / fWeight; }
Example 2
Source File: PartitionMembership.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Convert a single instance over. The converted instance is added to * the end of the output queue. * * @param instance the instance to convert * @throws Exception if something goes wrong */ protected void convertInstance(Instance instance) throws Exception { // Make copy and set weight to one Instance cp = (Instance)instance.copy(); cp.setWeight(1.0); // Set up values double [] instanceVals = new double[outputFormatPeek().numAttributes()]; double [] vals = m_partitionGenerator.getMembershipValues(cp); System.arraycopy(vals, 0, instanceVals, 0, vals.length); if (instance.classIndex() >= 0) { instanceVals[instanceVals.length - 1] = instance.classValue(); } push(new SparseInstance(instance.weight(), instanceVals)); }
Example 3
Source File: BaggingMLUpdateable.java From meka with GNU General Public License v3.0 | 6 votes |
@Override public void updateClassifier(Instance x) throws Exception { for(int i = 0; i < m_NumIterations; i++) { // Oza-Bag style int k = poisson(1.0, random); if (m_BagSizePercent == 100) { // Train on all instances k = 1; } if (k > 0) { // Train on this instance only if k > 0 Instance x_weighted = (Instance) x.copy(); x_weighted.setWeight(x.weight() * (double)k); ((UpdateableClassifier)m_Classifiers[i]).updateClassifier(x_weighted); } } }
Example 4
Source File: RotationForest.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Transforms an instance for the i-th classifier. * * @param instance the instance to be transformed * @param i the base classifier number * @return the transformed instance * @throws Exception if the instance can't be converted successfully */ protected Instance convertInstance( Instance instance, int i ) throws Exception { Instance newInstance = new DenseInstance( m_Headers[ i ].numAttributes( ) ); newInstance.setWeight(instance.weight()); newInstance.setDataset( m_Headers[ i ] ); int currentAttribute = 0; // Project the data for each group for( int j = 0; j < m_Groups[i].length; j++ ) { Instance auxInstance = new DenseInstance( m_Groups[i][j].length + 1 ); int k; for( k = 0; k < m_Groups[i][j].length; k++ ) { auxInstance.setValue( k, instance.value( m_Groups[i][j][k] ) ); } auxInstance.setValue( k, instance.classValue( ) ); auxInstance.setDataset( m_ReducedHeaders[ i ][ j ] ); m_ProjectionFilters[i][j].input( auxInstance ); auxInstance = m_ProjectionFilters[i][j].output( ); m_ProjectionFilters[i][j].batchFinished(); for( int a = 0; a < auxInstance.numAttributes() - 1; a++ ) { newInstance.setValue( currentAttribute++, auxInstance.value( a ) ); } } newInstance.setClassValue( instance.classValue() ); return newInstance; }
Example 5
Source File: PropositionalToMultiInstance.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * adds a new bag out of the given data and adds it to the output * * @param input the intput dataset * @param output the dataset this bag is added to * @param bagInsts the instances in this bag * @param bagIndex the bagIndex of this bag * @param classValue the associated class value * @param bagWeight the weight of the bag */ protected void addBag( Instances input, Instances output, Instances bagInsts, int bagIndex, double classValue, double bagWeight) { // copy strings/relational values for (int i = 0; i < bagInsts.numInstances(); i++) { RelationalLocator.copyRelationalValues( bagInsts.instance(i), false, input, m_InputRelAtts, bagInsts, m_BagRelAtts); StringLocator.copyStringValues( bagInsts.instance(i), false, input, m_InputStringAtts, bagInsts, m_BagStringAtts); } int value = output.attribute(1).addRelation(bagInsts); Instance newBag = new DenseInstance(output.numAttributes()); newBag.setValue(0, bagIndex); newBag.setValue(2, classValue); newBag.setValue(1, value); newBag.setWeight(bagWeight); newBag.setDataset(output); output.add(newBag); }
Example 6
Source File: ContractRotationForest.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Transforms an instance for the i-th classifier. * * @param instance the instance to be transformed * @param i the base classifier number * @return the transformed instance * @throws Exception if the instance can't be converted successfully */ protected Instance convertInstance( Instance instance, int i ) throws Exception { Instance newInstance = new DenseInstance( headers.get(i).numAttributes( ) ); newInstance.setWeight(instance.weight()); newInstance.setDataset(headers.get(i)); int currentAttribute = 0; // Project the data for each group int[][] g=groups.get(i); for( int j = 0; j < g.length; j++ ) { Instance auxInstance = new DenseInstance(g[j].length + 1 ); int k; for( k = 0; k < g[j].length; k++ ) { auxInstance.setValue( k, instance.value( g[j][k] ) ); } auxInstance.setValue( k, instance.classValue( ) ); auxInstance.setDataset(reducedHeaders.get(i)[ j ] ); Filter[] projection=projectionFilters.get(i); projection[j].input( auxInstance ); auxInstance = projection[j].output( ); projection[j].batchFinished(); for( int a = 0; a < auxInstance.numAttributes() - 1; a++ ) { newInstance.setValue( currentAttribute++, auxInstance.value( a ) ); } } newInstance.setClassValue( instance.classValue() ); return newInstance; }
Example 7
Source File: LPS.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Splits instances into subsets based on the given split. * * @param data the data to work with * @return the subsets of instances * @throws Exception if something goes wrong */ protected Instances[] splitData(Instances data) throws Exception { // Allocate array of Instances objects Instances[] subsets = new Instances[m_Prop.length]; for (int i = 0; i < m_Prop.length; i++) { subsets[i] = new Instances(data, data.numInstances()); } // Go through the data for (int i = 0; i < data.numInstances(); i++) { // Get instance Instance inst = data.instance(i); // Does the instance have a missing value? if (inst.isMissing(m_Attribute)) { // Split instance up for (int k = 0; k < m_Prop.length; k++) { if (m_Prop[k] > 0) { Instance copy = (Instance) inst.copy(); copy.setWeight(m_Prop[k] * inst.weight()); subsets[k].add(copy); } } // Proceed to next instance continue; } // Do we have a nominal attribute? if (data.attribute(m_Attribute).isNominal()) { subsets[(int) inst.value(m_Attribute)].add(inst); // Proceed to next instance continue; } // Do we have a numeric attribute? if (data.attribute(m_Attribute).isNumeric()) { subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst); // Proceed to next instance continue; } // Else throw an exception throw new IllegalArgumentException("Unknown attribute type"); } // Save memory for (int i = 0; i < m_Prop.length; i++) { subsets[i].compactify(); } // Return the subsets return subsets; }
Example 8
Source File: RacedIncrementalLogitBoost.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * performs a boosting iteration, returning a new model for the committee * * @param data the data to boost on * @return the new model * @throws Exception if anything goes wrong */ protected Classifier[] boost(Instances data) throws Exception { Classifier[] newModel = AbstractClassifier.makeCopies(m_Classifier, m_NumClasses); // Create a copy of the data with the class transformed into numeric Instances boostData = new Instances(data); boostData.deleteWithMissingClass(); int numInstances = boostData.numInstances(); // Temporarily unset the class index int classIndex = data.classIndex(); boostData.setClassIndex(-1); boostData.deleteAttributeAt(classIndex); boostData.insertAttributeAt(new Attribute("'pseudo class'"), classIndex); boostData.setClassIndex(classIndex); double [][] trainFs = new double [numInstances][m_NumClasses]; double [][] trainYs = new double [numInstances][m_NumClasses]; for (int j = 0; j < m_NumClasses; j++) { for (int i = 0, k = 0; i < numInstances; i++, k++) { while (data.instance(k).classIsMissing()) k++; trainYs[i][j] = (data.instance(k).classValue() == j) ? 1 : 0; } } // Evaluate / increment trainFs from the classifiers for (int x = 0; x < m_models.size(); x++) { for (int i = 0; i < numInstances; i++) { double [] pred = new double [m_NumClasses]; double predSum = 0; Classifier[] model = (Classifier[]) m_models.elementAt(x); for (int j = 0; j < m_NumClasses; j++) { pred[j] = model[j].classifyInstance(boostData.instance(i)); predSum += pred[j]; } predSum /= m_NumClasses; for (int j = 0; j < m_NumClasses; j++) { trainFs[i][j] += (pred[j] - predSum) * (m_NumClasses-1) / m_NumClasses; } } } for (int j = 0; j < m_NumClasses; j++) { // Set instance pseudoclass and weights for (int i = 0; i < numInstances; i++) { double p = RtoP(trainFs[i], j); Instance current = boostData.instance(i); double z, actual = trainYs[i][j]; if (actual == 1) { z = 1.0 / p; if (z > Z_MAX) { // threshold z = Z_MAX; } } else if (actual == 0) { z = -1.0 / (1.0 - p); if (z < -Z_MAX) { // threshold z = -Z_MAX; } } else { z = (actual - p) / (p * (1 - p)); } double w = (actual - p) / z; current.setValue(classIndex, z); current.setWeight(numInstances * w); } Instances trainData = boostData; if (m_UseResampling) { double[] weights = new double[boostData.numInstances()]; for (int kk = 0; kk < weights.length; kk++) { weights[kk] = boostData.instance(kk).weight(); } trainData = boostData.resampleWithWeights(m_RandomInstance, weights); } // Build the classifier newModel[j].buildClassifier(trainData); } return newModel; }
Example 9
Source File: RandomTree.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Splits instances into subsets based on the given split. * * @param data the data to work with * @return the subsets of instances * @throws Exception if something goes wrong */ protected Instances[] splitData(Instances data) throws Exception { // Allocate array of Instances objects Instances[] subsets = new Instances[m_Prop.length]; for (int i = 0; i < m_Prop.length; i++) { subsets[i] = new Instances(data, data.numInstances()); } // Go through the data for (int i = 0; i < data.numInstances(); i++) { // Get instance Instance inst = data.instance(i); // Does the instance have a missing value? if (inst.isMissing(m_Attribute)) { // Split instance up for (int k = 0; k < m_Prop.length; k++) { if (m_Prop[k] > 0) { Instance copy = (Instance) inst.copy(); copy.setWeight(m_Prop[k] * inst.weight()); subsets[k].add(copy); } } // Proceed to next instance continue; } // Do we have a nominal attribute? if (data.attribute(m_Attribute).isNominal()) { subsets[(int) inst.value(m_Attribute)].add(inst); // Proceed to next instance continue; } // Do we have a numeric attribute? if (data.attribute(m_Attribute).isNumeric()) { subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst); // Proceed to next instance continue; } // Else throw an exception throw new IllegalArgumentException("Unknown attribute type"); } // Save memory for (int i = 0; i < m_Prop.length; i++) { subsets[i].compactify(); } // Return the subsets return subsets; }
Example 10
Source File: NBTreeSplit.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Creates split on enumerated attribute. * * @exception Exception if something goes wrong */ private void handleEnumeratedAttribute(Instances trainInstances) throws Exception { m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights, true); m_c45S.buildClassifier(trainInstances); if (m_c45S.numSubsets() == 0) { return; } m_errors = 0; Instance instance; Instances [] trainingSets = new Instances [m_complexityIndex]; for (int i = 0; i < m_complexityIndex; i++) { trainingSets[i] = new Instances(trainInstances, 0); } /* m_distribution = new Distribution(m_complexityIndex, trainInstances.numClasses()); */ int subset; for (int i = 0; i < trainInstances.numInstances(); i++) { instance = trainInstances.instance(i); subset = m_c45S.whichSubset(instance); if (subset > -1) { trainingSets[subset].add((Instance)instance.copy()); } else { double [] weights = m_c45S.weights(instance); for (int j = 0; j < m_complexityIndex; j++) { try { Instance temp = (Instance) instance.copy(); if (weights.length == m_complexityIndex) { temp.setWeight(temp.weight() * weights[j]); } else { temp.setWeight(temp.weight() / m_complexityIndex); } trainingSets[j].add(temp); } catch (Exception ex) { ex.printStackTrace(); System.err.println("*** "+m_complexityIndex); System.err.println(weights.length); System.exit(1); } } } } /* // compute weights (weights of instances per subset m_weights = new double [m_complexityIndex]; for (int i = 0; i < m_complexityIndex; i++) { m_weights[i] = trainingSets[i].sumOfWeights(); } Utils.normalize(m_weights); */ /* // Only Instances with known values are relevant. Enumeration enu = trainInstances.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (!instance.isMissing(m_attIndex)) { // m_distribution.add((int)instance.value(m_attIndex),instance); trainingSets[(int)instances.value(m_attIndex)].add(instance); } else { // add these to the error count m_errors += instance.weight(); } } */ Random r = new Random(1); int minNumCount = 0; for (int i = 0; i < m_complexityIndex; i++) { if (trainingSets[i].numInstances() >= 5) { minNumCount++; // Discretize the sets Discretize disc = new Discretize(); disc.setInputFormat(trainingSets[i]); trainingSets[i] = Filter.useFilter(trainingSets[i], disc); trainingSets[i].randomize(r); trainingSets[i].stratify(5); NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable(); fullModel.buildClassifier(trainingSets[i]); // add the errors for this branch of the split m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r); } else { // if fewer than min obj then just count them as errors for (int j = 0; j < trainingSets[i].numInstances(); j++) { m_errors += trainingSets[i].instance(j).weight(); } } } // Check if there are at least five instances in at least two of the subsets // subsets. if (minNumCount > 1) { m_numSubsets = m_complexityIndex; } }
Example 11
Source File: NBTreeSplit.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Creates split on numeric attribute. * * @exception Exception if something goes wrong */ private void handleNumericAttribute(Instances trainInstances) throws Exception { m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights, true); m_c45S.buildClassifier(trainInstances); if (m_c45S.numSubsets() == 0) { return; } m_errors = 0; Instances [] trainingSets = new Instances [m_complexityIndex]; trainingSets[0] = new Instances(trainInstances, 0); trainingSets[1] = new Instances(trainInstances, 0); int subset = -1; // populate the subsets for (int i = 0; i < trainInstances.numInstances(); i++) { Instance instance = trainInstances.instance(i); subset = m_c45S.whichSubset(instance); if (subset != -1) { trainingSets[subset].add((Instance)instance.copy()); } else { double [] weights = m_c45S.weights(instance); for (int j = 0; j < m_complexityIndex; j++) { Instance temp = (Instance)instance.copy(); if (weights.length == m_complexityIndex) { temp.setWeight(temp.weight() * weights[j]); } else { temp.setWeight(temp.weight() / m_complexityIndex); } trainingSets[j].add(temp); } } } /* // compute weights (weights of instances per subset m_weights = new double [m_complexityIndex]; for (int i = 0; i < m_complexityIndex; i++) { m_weights[i] = trainingSets[i].sumOfWeights(); } Utils.normalize(m_weights); */ Random r = new Random(1); int minNumCount = 0; for (int i = 0; i < m_complexityIndex; i++) { if (trainingSets[i].numInstances() > 5) { minNumCount++; // Discretize the sets Discretize disc = new Discretize(); disc.setInputFormat(trainingSets[i]); trainingSets[i] = Filter.useFilter(trainingSets[i], disc); trainingSets[i].randomize(r); trainingSets[i].stratify(5); NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable(); fullModel.buildClassifier(trainingSets[i]); // add the errors for this branch of the split m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r); } else { for (int j = 0; j < trainingSets[i].numInstances(); j++) { m_errors += trainingSets[i].instance(j).weight(); } } } // Check if minimum number of Instances in at least two // subsets. if (minNumCount > 1) { m_numSubsets = m_complexityIndex; } }
Example 12
Source File: DTNB.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Classifies an instance for internal leave one out cross validation * of feature sets * * @param instance instance to be "left out" and classified * @param instA feature values of the selected features for the instance * @return the classification of the instance * @throws Exception if something goes wrong */ double evaluateInstanceLeaveOneOut(Instance instance, double [] instA) throws Exception { DecisionTableHashKey thekey; double [] tempDist; double [] normDist; thekey = new DecisionTableHashKey(instA); // if this one is not in the table if ((tempDist = (double [])m_entries.get(thekey)) == null) { throw new Error("This should never happen!"); } else { normDist = new double [tempDist.length]; System.arraycopy(tempDist,0,normDist,0,tempDist.length); normDist[(int)instance.classValue()] -= instance.weight(); // update the table // first check to see if the class counts are all zero now boolean ok = false; for (int i=0;i<normDist.length;i++) { if (Utils.gr(normDist[i],1.0)) { ok = true; break; } } // downdate the class prior counts m_classPriorCounts[(int)instance.classValue()] -= instance.weight(); double [] classPriors = m_classPriorCounts.clone(); Utils.normalize(classPriors); if (!ok) { // majority class normDist = classPriors; } else { Utils.normalize(normDist); } m_classPriorCounts[(int)instance.classValue()] += instance.weight(); if (m_NB != null){ // downdate NaiveBayes instance.setWeight(-instance.weight()); m_NB.updateClassifier(instance); double [] nbDist = m_NB.distributionForInstance(instance); instance.setWeight(-instance.weight()); m_NB.updateClassifier(instance); for (int i = 0; i < normDist.length; i++) { normDist[i] = (Math.log(normDist[i]) - Math.log(classPriors[i])); normDist[i] += Math.log(nbDist[i]); } normDist = Utils.logs2probs(normDist); // Utils.normalize(normDist); } if (m_evaluationMeasure == EVAL_AUC) { m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, instance); } else { m_evaluation.evaluateModelOnce(normDist, instance); } return Utils.maxIndex(normDist); } }
Example 13
Source File: PropositionalToMultiInstance.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Signify that this batch of input to the filter is finished. * If the filter requires all instances prior to filtering, * output() may now be called to retrieve the filtered instances. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } Instances input = getInputFormat(); input.sort(0); // make sure that bagID is sorted Instances output = getOutputFormat(); Instances bagInsts = output.attribute(1).relation(); Instance inst = new DenseInstance(bagInsts.numAttributes()); inst.setDataset(bagInsts); double bagIndex = input.instance(0).value(0); double classValue = input.instance(0).classValue(); double bagWeight = 0.0; // Convert pending input instances for(int i = 0; i < input.numInstances(); i++) { double currentBagIndex = input.instance(i).value(0); // copy the propositional instance value, except the bagIndex and the class value for (int j = 0; j < input.numAttributes() - 2; j++) inst.setValue(j, input.instance(i).value(j + 1)); inst.setWeight(input.instance(i).weight()); if (currentBagIndex == bagIndex){ bagInsts.add(inst); bagWeight += inst.weight(); } else{ addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight); bagInsts = bagInsts.stringFreeStructure(); bagInsts.add(inst); bagIndex = currentBagIndex; classValue = input.instance(i).classValue(); bagWeight = inst.weight(); } } // reach the last instance, create and add the last bag addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight); if (getRandomize()) output.randomize(new Random(getSeed())); for (int i = 0; i < output.numInstances(); i++) push(output.instance(i)); // Free memory flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); }
Example 14
Source File: NSR.java From meka with GNU General Public License v3.0 | 4 votes |
public Instances convertInstances(Instances D, int L) throws Exception { //Gather combinations HashMap<String,Integer> distinctCombinations = MLUtils.classCombinationCounts(D); if(getDebug()) System.out.println("Found "+distinctCombinations.size()+" unique combinations"); //Prune combinations MLUtils.pruneCountHashMap(distinctCombinations,m_P); if(getDebug()) System.out.println("Pruned to "+distinctCombinations.size()+" with P="+m_P); // Remove all class attributes Instances D_ = MLUtils.deleteAttributesAt(new Instances(D),MLUtils.gen_indices(L)); // Add a new class attribute D_.insertAttributeAt(new Attribute("CLASS", new ArrayList(distinctCombinations.keySet())),0); // create the class attribute D_.setClassIndex(0); //Add class values for (int i = 0; i < D.numInstances(); i++) { String y = MLUtils.encodeValue(MLUtils.toIntArray(D.instance(i),L)); // add it if(distinctCombinations.containsKey(y)) //if its class value exists D_.instance(i).setClassValue(y); // decomp else if(m_N > 0) { String d_subsets[] = SuperLabelUtils.getTopNSubsets(y, distinctCombinations, m_N); for (String s : d_subsets) { int w = distinctCombinations.get(s); Instance copy = (Instance)(D_.instance(i)).copy(); copy.setClassValue(s); copy.setWeight(1.0 / d_subsets.length); D_.add(copy); } } } // remove with missing class D_.deleteWithMissingClass(); // keep the header of new dataset for classification m_InstancesTemplate = new Instances(D_, 0); if (getDebug()) System.out.println(""+D_); return D_; }