Java Code Examples for weka.core.Instances#deleteWithMissingClass()
The following examples show how to use
weka.core.Instances#deleteWithMissingClass() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ConsistencySubsetEval.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Generates a attribute evaluator. Has to initialize all fields of the * evaluator that are not being set via options. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully */ public void buildEvaluator (Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); m_trainInstances = new Instances(data); m_trainInstances.deleteWithMissingClass(); m_classIndex = m_trainInstances.classIndex(); m_numAttribs = m_trainInstances.numAttributes(); m_numInstances = m_trainInstances.numInstances(); m_disTransform = new Discretize(); m_disTransform.setUseBetterEncoding(true); m_disTransform.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_disTransform); }
Example 2
Source File: MIWrapper.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Builds the classifier * * @param data the training data to be used for generating the * boosted classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances train = new Instances(data); train.deleteWithMissingClass(); if (m_Classifier == null) { throw new Exception("A base classifier has not been specified!"); } if (getDebug()) System.out.println("Start training ..."); m_NumClasses = train.numClasses(); //convert the training dataset into single-instance dataset m_ConvertToProp.setWeightMethod(getWeightMethod()); m_ConvertToProp.setInputFormat(train); train = Filter.useFilter(train, m_ConvertToProp); train.deleteAttributeAt(0); // remove the bag index attribute m_Classifier.buildClassifier(train); }
Example 3
Source File: RandomCommittee.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Builds the committee of randomizable classifiers. * * @param data the training data to be used for generating the * bagged classifier. * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class m_data = new Instances(data); m_data.deleteWithMissingClass(); super.buildClassifier(m_data); if (!(m_Classifier instanceof Randomizable)) { throw new IllegalArgumentException("Base learner must implement Randomizable!"); } m_Classifiers = AbstractClassifier.makeCopies(m_Classifier, m_NumIterations); Random random = m_data.getRandomNumberGenerator(m_Seed); // Resample data based on weights if base learner can't handle weights if (!(m_Classifier instanceof WeightedInstancesHandler)) { m_data = m_data.resampleWithWeights(random); } for (int j = 0; j < m_Classifiers.length; j++) { // Set the random number seed for the current classifier. ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); // Build the classifier. // m_Classifiers[j].buildClassifier(m_data); } buildClassifiers(); // save memory m_data = null; }
Example 4
Source File: Vote.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Buildclassifier selects a classifier from the set of classifiers by * minimising error on the training data. * * @param data the training data to be used for generating the boosted * classifier. * @throws Exception if the classifier could not be built successfully */ @Override public void buildClassifier(Instances data) throws Exception { // remove instances with missing class Instances newData = new Instances(data); newData.deleteWithMissingClass(); m_structure = new Instances(newData, 0); m_Random = new Random(getSeed()); if (m_classifiersToLoad.size() > 0) { m_preBuiltClassifiers.clear(); loadClassifiers(data); int index = 0; if (m_Classifiers.length == 1 && m_Classifiers[0] instanceof weka.classifiers.rules.ZeroR) { // remove the single ZeroR m_Classifiers = new Classifier[0]; } } // can classifier handle the data? getCapabilities().testWithFail(data); for (int i = 0; i < m_Classifiers.length; i++) { getClassifier(i).buildClassifier(newData); } }
Example 5
Source File: Stacking.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Buildclassifier selects a classifier from the set of classifiers * by minimising error on the training data. * * @param data the training data to be used for generating the * boosted classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { if (m_MetaClassifier == null) { throw new IllegalArgumentException("No meta classifier has been set"); } // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances newData = new Instances(data); m_BaseFormat = new Instances(data, 0); newData.deleteWithMissingClass(); Random random = new Random(m_Seed); newData.randomize(random); if (newData.classAttribute().isNominal()) { newData.stratify(m_NumFolds); } // Create meta level generateMetaLevel(newData, random); // restart the executor pool because at the end of processing // a set of classifiers it gets shutdown to prevent the program // executing as a server super.buildClassifier(newData); // Rebuild all the base classifiers on the full training data buildClassifiers(newData); }
Example 6
Source File: Ridor.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Builds a single rule learner with REP dealing with 2 classes. * This rule learner always tries to predict the class with label * m_Class. * * @param instances the training data * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { m_ClassAttribute = instances.classAttribute(); if (!m_ClassAttribute.isNominal()) throw new UnsupportedClassTypeException(" Only nominal class, please."); if(instances.numClasses() != 2) throw new Exception(" Only 2 classes, please."); Instances data = new Instances(instances); if(Utils.eq(data.sumOfWeights(),0)) throw new Exception(" No training data."); data.deleteWithMissingClass(); if(Utils.eq(data.sumOfWeights(),0)) throw new Exception(" The class labels of all the training data are missing."); if(data.numInstances() < m_Folds) throw new Exception(" Not enough data for REP."); m_Antds = new FastVector(); /* Split data into Grow and Prune*/ m_Random = new Random(m_Seed); data.randomize(m_Random); data.stratify(m_Folds); Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random); Instances pruneData=data.testCV(m_Folds, m_Folds-1); grow(growData); // Build this rule prune(pruneData); // Prune this rule }
Example 7
Source File: LMT.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Builds the classifier. * * @param data the data to train with * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances data) throws Exception{ // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances filteredData = new Instances(data); filteredData.deleteWithMissingClass(); //replace missing values m_replaceMissing = new ReplaceMissingValues(); m_replaceMissing.setInputFormat(filteredData); filteredData = Filter.useFilter(filteredData, m_replaceMissing); //possibly convert nominal attributes globally if (m_convertNominal) { m_nominalToBinary = new NominalToBinary(); m_nominalToBinary.setInputFormat(filteredData); filteredData = Filter.useFilter(filteredData, m_nominalToBinary); } int minNumInstances = 2; //create ModelSelection object, either for splits on the residuals or for splits on the class value ModelSelection modSelection; if (m_splitOnResiduals) { modSelection = new ResidualModelSelection(minNumInstances); } else { modSelection = new C45ModelSelection(minNumInstances, filteredData, true); } //create tree root m_tree = new LMTNode(modSelection, m_numBoostingIterations, m_fastRegression, m_errorOnProbabilities, m_minNumInstances, m_weightTrimBeta, m_useAIC); //build tree m_tree.buildClassifier(filteredData); if (modSelection instanceof C45ModelSelection) ((C45ModelSelection)modSelection).cleanup(); }
Example 8
Source File: DMNBtext.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Generates the classifier. * * @param instances set of instances serving as training data * @exception Exception if the classifier has not been generated successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances instances = new Instances(data); instances.deleteWithMissingClass(); m_binaryClassifiers = new DNBBinary[instances.numClasses()]; m_numClasses=instances.numClasses(); m_headerInfo = new Instances(instances, 0); for (int i = 0; i < instances.numClasses(); i++) { m_binaryClassifiers[i] = new DNBBinary(); m_binaryClassifiers[i].setTargetClass(i); m_binaryClassifiers[i].initClassifier(instances); } if (instances.numInstances() == 0) return; //Iterative update Random random = new Random(); for (int it = 0; it < m_NumIterations; it++) { for (int i = 0; i < instances.numInstances(); i++) { updateClassifier(instances.instance(i)); } } // Utils.normalize(m_oldClassDis); // Utils.normalize(m_ClassDis); // m_originalPositive = m_oldClassDis[0]; // m_positive = m_ClassDis[0]; }
Example 9
Source File: NSR.java From meka with GNU General Public License v3.0 | 4 votes |
public Instances convertInstances(Instances D, int L) throws Exception { //Gather combinations HashMap<String,Integer> distinctCombinations = MLUtils.classCombinationCounts(D); if(getDebug()) System.out.println("Found "+distinctCombinations.size()+" unique combinations"); //Prune combinations MLUtils.pruneCountHashMap(distinctCombinations,m_P); if(getDebug()) System.out.println("Pruned to "+distinctCombinations.size()+" with P="+m_P); // Remove all class attributes Instances D_ = MLUtils.deleteAttributesAt(new Instances(D),MLUtils.gen_indices(L)); // Add a new class attribute D_.insertAttributeAt(new Attribute("CLASS", new ArrayList(distinctCombinations.keySet())),0); // create the class attribute D_.setClassIndex(0); //Add class values for (int i = 0; i < D.numInstances(); i++) { String y = MLUtils.encodeValue(MLUtils.toIntArray(D.instance(i),L)); // add it if(distinctCombinations.containsKey(y)) //if its class value exists D_.instance(i).setClassValue(y); // decomp else if(m_N > 0) { String d_subsets[] = SuperLabelUtils.getTopNSubsets(y, distinctCombinations, m_N); for (String s : d_subsets) { int w = distinctCombinations.get(s); Instance copy = (Instance)(D_.instance(i)).copy(); copy.setClassValue(s); copy.setWeight(1.0 / d_subsets.length); D_.add(copy); } } } // remove with missing class D_.deleteWithMissingClass(); // keep the header of new dataset for classification m_InstancesTemplate = new Instances(D_, 0); if (getDebug()) System.out.println(""+D_); return D_; }
Example 10
Source File: RaceSearch.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Searches the attribute subset space by racing cross validation * errors of competing subsets * * @param ASEval the attribute evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @throws Exception if the search can't be completed */ public int[] search (ASEvaluation ASEval, Instances data) throws Exception { if (!(ASEval instanceof SubsetEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator! (RaceSearch)"); } if (ASEval instanceof UnsupervisedSubsetEvaluator) { throw new Exception("Can't use an unsupervised subset evaluator " +"(RaceSearch)."); } if (!(ASEval instanceof HoldOutSubsetEvaluator)) { throw new Exception("Must use a HoldOutSubsetEvaluator, eg. " +"weka.attributeSelection.ClassifierSubsetEval " +"(RaceSearch)"); } if (!(ASEval instanceof ErrorBasedMeritEvaluator)) { throw new Exception("Only error based subset evaluators can be used, " +"eg. weka.attributeSelection.ClassifierSubsetEval " +"(RaceSearch)"); } m_Instances = new Instances(data); m_Instances.deleteWithMissingClass(); if (m_Instances.numInstances() == 0) { throw new Exception("All train instances have missing class! (RaceSearch)"); } if (m_rankingRequested && m_numToSelect > m_Instances.numAttributes()-1) { throw new Exception("More attributes requested than exist in the data " +"(RaceSearch)."); } m_theEvaluator = (HoldOutSubsetEvaluator)ASEval; m_numAttribs = m_Instances.numAttributes(); m_classIndex = m_Instances.classIndex(); if (m_rankingRequested) { m_rankedAtts = new double[m_numAttribs-1][2]; m_rankedSoFar = 0; } if (m_xvalType == LEAVE_ONE_OUT) { m_numFolds = m_Instances.numInstances(); } else { m_numFolds = 10; } Random random = new Random(1); // I guess this should really be a parameter? m_Instances.randomize(random); int [] bestSubset=null; switch (m_raceType) { case FORWARD_RACE: case BACKWARD_RACE: bestSubset = hillclimbRace(m_Instances, random); break; case SCHEMATA_RACE: bestSubset = schemataRace(m_Instances, random); break; case RANK_RACE: bestSubset = rankRace(m_Instances, random); break; } return bestSubset; }
Example 11
Source File: MajorityConfidenceVote.java From AILibs with GNU Affero General Public License v3.0 | 4 votes |
/** * Builds the ensemble by assessing the classifier weights using a cross * validation of each classifier of the ensemble and then training the * classifiers using the complete <code>data</code>. * * @param data * Training instances */ @Override public void buildClassifier(final Instances data) throws Exception { this.classifierWeights = new double[this.m_Classifiers.length]; // remove instances with missing class Instances newData = new Instances(data); newData.deleteWithMissingClass(); this.m_structure = new Instances(newData, 0); // can classifier handle the data? this.getCapabilities().testWithFail(data); for (int i = 0; i < this.m_Classifiers.length; i++) { if (Thread.currentThread().isInterrupted()) { throw new InterruptedException(); } // Perform cross validation to determine the classifier weights for (int n = 0; n < this.numFolds; n++) { Instances train = data.trainCV(this.numFolds, n, new Random(this.seed)); Instances test = data.testCV(this.numFolds, n); this.getClassifier(i).buildClassifier(train); Evaluation eval = new Evaluation(train); eval.evaluateModel(this.getClassifier(i), test); this.classifierWeights[i] += eval.pctCorrect() / 100d; } this.classifierWeights[i] = Math.pow(this.classifierWeights[i], 2); this.classifierWeights[i] /= this.numFolds; this.getClassifier(i).buildClassifier(newData); } // If no classifier predicted something correctly, assume uniform distribution if (Arrays.stream(this.classifierWeights).allMatch(d -> d < 0.000001d)) { for (int i = 0; i < this.classifierWeights.length; i++) { this.classifierWeights[i] = 1d / this.classifierWeights.length; } } }
Example 12
Source File: OneR.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Generates the classifier. * * @param instances the instances to be used for building the classifier * @throws Exception if the classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { boolean noRule = true; // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class Instances data = new Instances(instances); data.deleteWithMissingClass(); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return; } else { m_ZeroR = null; } // for each attribute ... Enumeration enu = instances.enumerateAttributes(); while (enu.hasMoreElements()) { try { OneRRule r = newRule((Attribute) enu.nextElement(), data); // if this attribute is the best so far, replace the rule if (noRule || r.m_correct > m_rule.m_correct) { m_rule = r; } noRule = false; } catch (Exception ex) { } } if (noRule) throw new WekaException("No attributes found to work with!"); }
Example 13
Source File: ConjunctiveRule.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Builds a single rule learner with REP dealing with nominal classes or * numeric classes. * For nominal classes, this rule learner predicts a distribution on * the classes. * For numeric classes, this learner predicts a single value. * * @param instances the training data * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class Instances data = new Instances(instances); data.deleteWithMissingClass(); if(data.numInstances() < m_Folds) throw new Exception("Not enough data for REP."); m_ClassAttribute = data.classAttribute(); if(m_ClassAttribute.isNominal()) m_NumClasses = m_ClassAttribute.numValues(); else m_NumClasses = 1; m_Antds = new FastVector(); m_DefDstr = new double[m_NumClasses]; m_Cnsqt = new double[m_NumClasses]; m_Targets = new FastVector(); m_Random = new Random(m_Seed); if(m_NumAntds != -1){ grow(data); } else{ data.randomize(m_Random); // Split data into Grow and Prune data.stratify(m_Folds); Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random); Instances pruneData=data.testCV(m_Folds, m_Folds-1); grow(growData); // Build this rule prune(pruneData); // Prune this rule } if(m_ClassAttribute.isNominal()){ Utils.normalize(m_Cnsqt); if(Utils.gr(Utils.sum(m_DefDstr), 0)) Utils.normalize(m_DefDstr); } }
Example 14
Source File: Ridor.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Builds a ripple-down manner rule learner. * * @param instances the training data * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class Instances data = new Instances(instances); data.deleteWithMissingClass(); int numCl = data.numClasses(); m_Root = new Ridor_node(); m_Class = instances.classAttribute(); // The original class label int index = data.classIndex(); m_Cover = data.sumOfWeights(); m_Random = new Random(m_Seed); /* Create a binary attribute */ FastVector binary_values = new FastVector(2); binary_values.addElement("otherClasses"); binary_values.addElement("defClass"); Attribute attr = new Attribute ("newClass", binary_values); data.insertAttributeAt(attr, index); data.setClassIndex(index); // The new class label /* Partition the data into bags according to their original class values */ Instances[] dataByClass = new Instances[numCl]; for(int i=0; i < numCl; i++) dataByClass[i] = new Instances(data, data.numInstances()); // Empty bags for(int i=0; i < data.numInstances(); i++){ // Partitioning Instance inst = data.instance(i); inst.setClassValue(0); // Set new class vaue to be 0 dataByClass[(int)inst.value(index+1)].add(inst); } for(int i=0; i < numCl; i++) dataByClass[i].deleteAttributeAt(index+1); // Delete original class m_Root.findRules(dataByClass, 0); }
Example 15
Source File: FT.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Builds the classifier. * * @param data the data to train with * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances data) throws Exception{ // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances filteredData = new Instances(data); filteredData.deleteWithMissingClass(); //replace missing values m_replaceMissing = new ReplaceMissingValues(); m_replaceMissing.setInputFormat(filteredData); filteredData = Filter.useFilter(filteredData, m_replaceMissing); //possibly convert nominal attributes globally if (m_convertNominal) { m_nominalToBinary = new NominalToBinary(); m_nominalToBinary.setInputFormat(filteredData); filteredData = Filter.useFilter(filteredData, m_nominalToBinary); } int minNumInstances = 2; //create a FT tree root if (m_modelType==0) m_tree = new FTNode( m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, m_weightTrimBeta, m_useAIC); //create a FTLeaves tree root if (m_modelType==1){ m_tree = new FTLeavesNode(m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, m_weightTrimBeta, m_useAIC); } //create a FTInner tree root if (m_modelType==2) m_tree = new FTInnerNode(m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, m_weightTrimBeta, m_useAIC); //build tree m_tree.buildClassifier(filteredData); // prune tree m_tree.prune(); m_tree.assignIDs(0); m_tree.cleanup(); }
Example 16
Source File: AttributeSelectedClassifier.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Build the classifier on the dimensionally reduced data. * * @param data the training data * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { if (m_Classifier == null) { throw new Exception("No base classifier has been set!"); } if (m_Evaluator == null) { throw new Exception("No attribute evaluator has been set!"); } if (m_Search == null) { throw new Exception("No search method has been set!"); } // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances newData = new Instances(data); newData.deleteWithMissingClass(); if (newData.numInstances() == 0) { m_Classifier.buildClassifier(newData); return; } if (newData.classAttribute().isNominal()) { m_numClasses = newData.classAttribute().numValues(); } else { m_numClasses = 1; } Instances resampledData = null; // check to see if training data has all equal weights double weight = newData.instance(0).weight(); boolean ok = false; for (int i = 1; i < newData.numInstances(); i++) { if (newData.instance(i).weight() != weight) { ok = true; break; } } if (ok) { if (!(m_Evaluator instanceof WeightedInstancesHandler) || !(m_Classifier instanceof WeightedInstancesHandler)) { Random r = new Random(1); for (int i = 0; i < 10; i++) { r.nextDouble(); } resampledData = newData.resampleWithWeights(r); } } else { // all equal weights in the training data so just use as is resampledData = newData; } m_AttributeSelection = new AttributeSelection(); m_AttributeSelection.setEvaluator(m_Evaluator); m_AttributeSelection.setSearch(m_Search); long start = System.currentTimeMillis(); m_AttributeSelection. SelectAttributes((m_Evaluator instanceof WeightedInstancesHandler) ? newData : resampledData); long end = System.currentTimeMillis(); if (m_Classifier instanceof WeightedInstancesHandler) { newData = m_AttributeSelection.reduceDimensionality(newData); m_Classifier.buildClassifier(newData); } else { resampledData = m_AttributeSelection.reduceDimensionality(resampledData); m_Classifier.buildClassifier(resampledData); } long end2 = System.currentTimeMillis(); m_numAttributesSelected = m_AttributeSelection.numberAttributesSelected(); m_ReducedHeader = new Instances((m_Classifier instanceof WeightedInstancesHandler) ? newData : resampledData, 0); m_selectionTime = (double)(end - start); m_totalTime = (double)(end2 - start); }
Example 17
Source File: CVParameterSelection.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Generates the classifier. * * @param instances set of instances serving as training data * @throws Exception if the classifier has not been generated successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class Instances trainData = new Instances(instances); trainData.deleteWithMissingClass(); if (!(m_Classifier instanceof OptionHandler)) { throw new IllegalArgumentException("Base classifier should be OptionHandler."); } m_InitOptions = ((OptionHandler)m_Classifier).getOptions(); m_BestPerformance = -99; m_NumAttributes = trainData.numAttributes(); Random random = new Random(m_Seed); trainData.randomize(random); m_TrainFoldSize = trainData.trainCV(m_NumFolds, 0).numInstances(); // Check whether there are any parameters to optimize if (m_CVParams.size() == 0) { m_Classifier.buildClassifier(trainData); m_BestClassifierOptions = m_InitOptions; return; } if (trainData.classAttribute().isNominal()) { trainData.stratify(m_NumFolds); } m_BestClassifierOptions = null; // Set up m_ClassifierOptions -- take getOptions() and remove // those being optimised. m_ClassifierOptions = ((OptionHandler)m_Classifier).getOptions(); for (int i = 0; i < m_CVParams.size(); i++) { Utils.getOption(((CVParameter)m_CVParams.elementAt(i)).m_ParamChar, m_ClassifierOptions); } findParamsByCrossValidation(0, trainData, random); String [] options = (String [])m_BestClassifierOptions.clone(); ((OptionHandler)m_Classifier).setOptions(options); m_Classifier.buildClassifier(trainData); }
Example 18
Source File: RacedIncrementalLogitBoost.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * performs a boosting iteration, returning a new model for the committee * * @param data the data to boost on * @return the new model * @throws Exception if anything goes wrong */ protected Classifier[] boost(Instances data) throws Exception { Classifier[] newModel = AbstractClassifier.makeCopies(m_Classifier, m_NumClasses); // Create a copy of the data with the class transformed into numeric Instances boostData = new Instances(data); boostData.deleteWithMissingClass(); int numInstances = boostData.numInstances(); // Temporarily unset the class index int classIndex = data.classIndex(); boostData.setClassIndex(-1); boostData.deleteAttributeAt(classIndex); boostData.insertAttributeAt(new Attribute("'pseudo class'"), classIndex); boostData.setClassIndex(classIndex); double [][] trainFs = new double [numInstances][m_NumClasses]; double [][] trainYs = new double [numInstances][m_NumClasses]; for (int j = 0; j < m_NumClasses; j++) { for (int i = 0, k = 0; i < numInstances; i++, k++) { while (data.instance(k).classIsMissing()) k++; trainYs[i][j] = (data.instance(k).classValue() == j) ? 1 : 0; } } // Evaluate / increment trainFs from the classifiers for (int x = 0; x < m_models.size(); x++) { for (int i = 0; i < numInstances; i++) { double [] pred = new double [m_NumClasses]; double predSum = 0; Classifier[] model = (Classifier[]) m_models.elementAt(x); for (int j = 0; j < m_NumClasses; j++) { pred[j] = model[j].classifyInstance(boostData.instance(i)); predSum += pred[j]; } predSum /= m_NumClasses; for (int j = 0; j < m_NumClasses; j++) { trainFs[i][j] += (pred[j] - predSum) * (m_NumClasses-1) / m_NumClasses; } } } for (int j = 0; j < m_NumClasses; j++) { // Set instance pseudoclass and weights for (int i = 0; i < numInstances; i++) { double p = RtoP(trainFs[i], j); Instance current = boostData.instance(i); double z, actual = trainYs[i][j]; if (actual == 1) { z = 1.0 / p; if (z > Z_MAX) { // threshold z = Z_MAX; } } else if (actual == 0) { z = -1.0 / (1.0 - p); if (z < -Z_MAX) { // threshold z = -Z_MAX; } } else { z = (actual - p) / (p * (1 - p)); } double w = (actual - p) / z; current.setValue(classIndex, z); current.setWeight(numInstances * w); } Instances trainData = boostData; if (m_UseResampling) { double[] weights = new double[boostData.numInstances()]; for (int kk = 0; kk < weights.length; kk++) { weights[kk] = boostData.instance(kk).weight(); } trainData = boostData.resampleWithWeights(m_RandomInstance, weights); } // Build the classifier newModel[j].buildClassifier(trainData); } return newModel; }
Example 19
Source File: RandomSubSpace.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * builds the classifier. * * @param data the training data to be used for generating the * classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class m_data = new Instances(data); m_data.deleteWithMissingClass(); // only class? -> build ZeroR model if (m_data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(m_data); return; } else { m_ZeroR = null; } super.buildClassifier(data); Integer[] indices = new Integer[data.numAttributes()-1]; int classIndex = data.classIndex(); int offset = 0; for(int i = 0; i < indices.length+1; i++) { if (i != classIndex) { indices[offset++] = i+1; } } int subSpaceSize = numberOfAttributes(indices.length, getSubSpaceSize()); Random random = data.getRandomNumberGenerator(m_Seed); for (int j = 0; j < m_Classifiers.length; j++) { if (m_Classifier instanceof Randomizable) { ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); } FilteredClassifier fc = new FilteredClassifier(); fc.setClassifier(m_Classifiers[j]); m_Classifiers[j] = fc; Remove rm = new Remove(); rm.setOptions(new String[]{"-V", "-R", randomSubSpace(indices,subSpaceSize,classIndex+1,random)}); fc.setFilter(rm); // build the classifier //m_Classifiers[j].buildClassifier(m_data); } buildClassifiers(); // save memory m_data = null; }
Example 20
Source File: AdditiveRegression.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Build the classifier on the supplied data * * @param data the training data * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { super.buildClassifier(data); // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances newData = new Instances(data); newData.deleteWithMissingClass(); double sum = 0; double temp_sum = 0; // Add the model for the mean first m_zeroR = new ZeroR(); m_zeroR.buildClassifier(newData); // only class? -> use only ZeroR model if (newData.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_SuitableData = false; return; } else { m_SuitableData = true; } newData = residualReplace(newData, m_zeroR, false); for (int i = 0; i < newData.numInstances(); i++) { sum += newData.instance(i).weight() * newData.instance(i).classValue() * newData.instance(i).classValue(); } if (m_Debug) { System.err.println("Sum of squared residuals " +"(predicting the mean) : " + sum); } m_NumIterationsPerformed = 0; do { temp_sum = sum; // Build the classifier m_Classifiers[m_NumIterationsPerformed].buildClassifier(newData); newData = residualReplace(newData, m_Classifiers[m_NumIterationsPerformed], true); sum = 0; for (int i = 0; i < newData.numInstances(); i++) { sum += newData.instance(i).weight() * newData.instance(i).classValue() * newData.instance(i).classValue(); } if (m_Debug) { System.err.println("Sum of squared residuals : "+sum); } m_NumIterationsPerformed++; } while (((temp_sum - sum) > Utils.SMALL) && (m_NumIterationsPerformed < m_Classifiers.length)); }