Java Code Examples for weka.classifiers.Classifier#buildClassifier()
The following examples show how to use
weka.classifiers.Classifier#buildClassifier() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: logistic_regression.java From CompetitiveJava with MIT License | 6 votes |
/** * This method is used to process the input and return the statistics. * * @throws Exception */ public static void process() throws Exception { Instances trainingDataSet = getDataSet(TRAINING_DATA_SET_FILENAME); Instances testingDataSet = getDataSet(TESTING_DATA_SET_FILENAME); /** Classifier here is Linear Regression */ Classifier classifier = new weka.classifiers.functions.Logistic(); /** */ classifier.buildClassifier(trainingDataSet); /** * train the alogorithm with the training data and evaluate the System.out.println("** Linear Regression Evaluation with Datasets **"); System.out.println(eval.toSummaryString()); System.out.print(" the expression for the input data as per alogorithm is "); System.out.println(classifier); Instance predicationDataSet = getDataSet(PREDICTION_DATA_SET_FILENAME).lastInstance(); double value = classifier.classifyInstance(predicationDataSet); /** Prediction Output */ System.out.println(value); }
Example 2
Source File: BOSSC45.java From tsml with GNU General Public License v3.0 | 6 votes |
public static void main(String[] args) throws Exception{ //Minimum working example String dataset = "BeetleFly"; Instances train = DatasetLoading.loadDataNullable("C:\\TSC Problems\\"+dataset+"\\"+dataset+"_TRAIN.arff"); Instances test = DatasetLoading.loadDataNullable("C:\\TSC Problems\\"+dataset+"\\"+dataset+"_TEST.arff"); Classifier c = new BOSSC45(); c.buildClassifier(train); double accuracy = ClassifierTools.accuracy(test, c); System.out.println("BOSSC45 accuracy on " + dataset + " fold 0 = " + accuracy); //Other examples/tests // detailedFold0Test(dataset); // resampleTest(dataset, 5); }
Example 3
Source File: AllPairsTable.java From AILibs with GNU Affero General Public License v3.0 | 6 votes |
public AllPairsTable(final Instances training, final Instances validation, final Classifier c) throws Exception { Collection<String> classes = WekaUtil.getClassesActuallyContainedInDataset(training); for (Collection<String> set : SetUtil.getAllPossibleSubsetsWithSize(classes, 2)) { List<String> pair = set.stream().sorted().collect(Collectors.toList()); String a = pair.get(0); String b = pair.get(1); Instances trainingData = WekaUtil.getInstancesOfClass(training, a); trainingData.addAll(WekaUtil.getInstancesOfClass(training, b)); c.buildClassifier(trainingData); Instances validationData = WekaUtil.getInstancesOfClass(validation, a); validationData.addAll(WekaUtil.getInstancesOfClass(validation, b)); Evaluation eval = new Evaluation(trainingData); eval.evaluateModel(c, validationData); if (!this.separabilities.containsKey(a)) { this.separabilities.put(a, new HashMap<>()); } this.separabilities.get(a).put(b, eval.pctCorrect() / 100); } this.classCount = WekaUtil.getNumberOfInstancesPerClass(training); this.sum = training.size(); }
Example 4
Source File: WekaUtilTester.java From AILibs with GNU Affero General Public License v3.0 | 6 votes |
@Test public void checkSplit() throws Exception { Instances inst = new Instances(new BufferedReader(new FileReader(VOWEL_ARFF))); inst.setClassIndex(inst.numAttributes() - 1); for (Classifier c : this.portfolio) { /* eval for CV */ inst.stratify(10); Instances train = inst.trainCV(10, 0); Instances test = inst.testCV(10, 0); Assert.assertEquals(train.size() + test.size(), inst.size()); Evaluation eval = new Evaluation(train); eval.crossValidateModel(c, inst, 10, new Random(0)); c.buildClassifier(train); eval.evaluateModel(c, test); System.out.println(eval.pctCorrect()); } }
Example 5
Source File: BasicBuildTests.java From tsml with GNU General Public License v3.0 | 6 votes |
public static void buildAllClassifiers(String[] problems, String[] classifiers, String path) { for(String str:problems){ System.out.println("Building all for problem "+str); Instances train = DatasetLoading.loadData(path+str+"\\"+str+"_TRAIN.arff"); Instances test = DatasetLoading.loadData(path+str+"\\"+str+"_TEST.arff"); for(String cls:classifiers){ System.out.print("\t Building "+cls+" .... "); Classifier c= ClassifierLists.setClassifierClassic(cls,0); try{ c.buildClassifier(train); System.out.print("Built successfully. Accuracy = "); double a=ClassifierTools.accuracy(test, c); System.out.println(a); }catch(Exception e){ System.out.println("Classifier failed to build with exception "+e); // e.printStackTrace(); } } } }
Example 6
Source File: TestUtil.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Perform simple holdout with a given percentage * * @param clf Classifier * @param data Full dataset * @param p Split percentage */ public static void holdout(Classifier clf, Instances data, double p) throws Exception { Instances[] split = splitTrainTest(data, p); Instances train = split[0]; Instances test = split[1]; logger.info("Classifier: \n{}", clf.toString()); clf.buildClassifier(train); Evaluation trainEval = new Evaluation(train); trainEval.evaluateModel(clf, train); logger.info("Weka Train Evaluation:"); logger.info(trainEval.toSummaryString()); if (!data.classAttribute().isNumeric()) { logger.info(trainEval.toMatrixString()); } Evaluation testEval = new Evaluation(train); logger.info("Weka Test Evaluation:"); testEval.evaluateModel(clf, test); logger.info(testEval.toSummaryString()); if (!data.classAttribute().isNumeric()) { logger.info(testEval.toMatrixString()); } }
Example 7
Source File: BOSSSpatialPyramids_BD.java From tsml with GNU General Public License v3.0 | 5 votes |
public static void resampleTest(String dset, int resamples) throws Exception { Instances train = DatasetLoading.loadDataNullable("C:\\TSC Problems\\"+dset+"\\"+dset+"_TRAIN.arff"); Instances test = DatasetLoading.loadDataNullable("C:\\TSC Problems\\"+dset+"\\"+dset+"_TEST.arff"); Classifier c = new BOSSSpatialPyramids_BD(); //c.setCVPath("C:\\tempproject\\BOSSEnsembleCVtest.csv"); double [] accs = new double[resamples]; for(int i=0;i<resamples;i++){ Instances[] data=InstanceTools.resampleTrainAndTestInstances(train, test, i); c.buildClassifier(data[0]); accs[i]= ClassifierTools.accuracy(data[1], c); if (i==0) System.out.print(accs[i]); else System.out.print("," + accs[i]); } double mean = 0; for(int i=0;i<resamples;i++) mean += accs[i]; mean/=resamples; System.out.println("\n\nBOSSEnsembleSP mean acc over " + resamples + " resamples: " + mean); }
Example 8
Source File: BOSSC45.java From tsml with GNU General Public License v3.0 | 5 votes |
public static void resampleTest(String dset, int resamples) throws Exception { Instances train = DatasetLoading.loadDataNullable("C:\\TSC Problems\\"+dset+"\\"+dset+"_TRAIN.arff"); Instances test = DatasetLoading.loadDataNullable("C:\\TSC Problems\\"+dset+"\\"+dset+"_TEST.arff"); Classifier c = new BOSSC45(); //c.setCVPath("C:\\tempproject\\BOSSEnsembleCVtest.csv"); double [] accs = new double[resamples]; for(int i=0;i<resamples;i++){ Instances[] data=InstanceTools.resampleTrainAndTestInstances(train, test, i); c.buildClassifier(data[0]); accs[i]= ClassifierTools.accuracy(data[1], c); if (i==0) System.out.print(accs[i]); else System.out.print("," + accs[i]); } double mean = 0; for(int i=0;i<resamples;i++) mean += accs[i]; mean/=resamples; System.out.println("\n\nBOSSEnsemble mean acc over " + resamples + " resamples: " + mean); }
Example 9
Source File: SpatialBOSS.java From tsml with GNU General Public License v3.0 | 5 votes |
public static void main(String[] args) throws Exception{ // Experiments.ExperimentalArguments exp = new Experiments.ExperimentalArguments(); // exp.dataReadLocation = "C:/TSCProblems2018/"; // exp.resultsWriteLocation = "C:/Temp/spatialboss/"; // exp.classifierName = "SpatialBOSS"; // exp.datasetName = "Arrowhead"; // exp.generateErrorEstimateOnTrainSet = true; // exp.forceEvaluation = true; // exp.foldId = 1; // // Experiments.setupAndRunExperiment(exp); //Minimum working example String dataset = "ItalyPowerDemand"; Instances train = DatasetLoading.loadDataNullable("C:\\TSC Problems\\"+dataset+"\\"+dataset+"_TRAIN.arff"); Instances test = DatasetLoading.loadDataNullable("C:\\TSC Problems\\"+dataset+"\\"+dataset+"_TEST.arff"); Classifier c = new SpatialBOSS(); c.buildClassifier(train); double accuracy = ClassifierTools.accuracy(test, c); System.out.println("BOSSEnsembleSP accuracy on " + dataset + " fold 0 = " + accuracy); //Other examples/tests // detailedFold0Test(dataset); // resampleTest(dataset, 25); }
Example 10
Source File: Chopper.java From collective-classification-weka-package with GNU General Public License v3.0 | 5 votes |
/** * performs the actual building of the classifier * @throws Exception if building fails */ @Override protected void buildClassifier() throws Exception { Classifier c; c = getNextClassifier(); if (getDebug()) System.out.println( "buildClassifier: " + m_CurrentClassifierIndex + ". " + c.getClass().getName()); c.buildClassifier(m_TrainsetNew); }
Example 11
Source File: RelExTool.java From Criteria2Query with Apache License 2.0 | 5 votes |
public void trainClassifier(String trainfile,String modelpath) throws Exception{ Classifier m_classifier = new RandomForest(); File inputFile = new File(trainfile); ArffLoader atf = new ArffLoader(); atf.setFile(inputFile); Instances instancesTrain = atf.getDataSet(); instancesTrain.setClassIndex(6); m_classifier.buildClassifier(instancesTrain); saveModel(m_classifier, modelpath); }
Example 12
Source File: Weighting.java From collective-classification-weka-package with GNU General Public License v3.0 | 5 votes |
/** * performs the actual building of the classifier * @throws Exception if building fails */ @Override protected void buildClassifier() throws Exception { Classifier c; c = getNextClassifier(); if (getDebug()) System.out.println( "buildClassifier: " + m_CurrentClassifierIndex + ". " + c.getClass().getName()); c.buildClassifier(m_TrainsetNew); }
Example 13
Source File: SimulationExperiments.java From tsml with GNU General Public License v3.0 | 4 votes |
public static void dictionarySimulatorThreadExperiment() throws Exception { Model.setDefaultSigma(1); boolean overwrite=false; int experiments=1; for(int seriesLength=300;seriesLength<=300;seriesLength+=300) { int[] casesPerClass = new int[2]; casesPerClass[0] = casesPerClass[1] = 100; int[] shapesPerClass = new int[]{5, 20}; double[] acc = new double[4]; long[] trainTime = new long[4]; long[] testTime = new long[4]; long[] mem = new long[4]; long t1, t2; String[] classifierNames = {"BOSS"};//, "cBOSS", "SpatialBOSS", "WEASEL"}; MemoryMXBean mx= ManagementFactory.getMemoryMXBean(); Notification notif; /* GarbageCollectorMXBean gc=mx. // receive the notification emitted by a GarbageCollectorMXBean and set to notif synchronized (mx){ mx.wait(); } notif=mx.get String notifType = "TESTY"; //notif.getType(); if (notifType.equals(GarbageCollectionNotificationInfo.GARBAGE_COLLECTION_NOTIFICATION)) { // retrieve the garbage collection notification information CompositeData cd = (CompositeData) notif.getUserData(); GarbageCollectionNotificationInfo info = GarbageCollectionNotificationInfo.from(cd); } */ for (int i = 0; i < experiments; i++) { Instances data = SimulateDictionaryData.generateDictionaryData(seriesLength, casesPerClass, shapesPerClass); Instances[] split = InstanceTools.resampleInstances(data, i, 0.2); System.out.println(" Testing thread model: series length =" + seriesLength + " Experiment Index" + i + " Train size =" + split[0].numInstances() + " test size =" + split[1].numInstances()); for (int j = 0; j < classifierNames.length; j++) { System.gc(); long memoryBefore = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); Classifier c = ClassifierLists.setClassifierClassic(classifierNames[j], i); t1 = System.nanoTime(); c.buildClassifier(split[0]); trainTime[j] = System.nanoTime() - t1; t1 = System.nanoTime(); acc[j] = ClassifierTools.accuracy(split[1], c); testTime[j] = System.nanoTime() - t1; System.gc(); mem[j] = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory() - memoryBefore; System.out.println("\t" + classifierNames[j] + " ACC = " + acc[j] + " Train Time =" + trainTime[j] + " Test Time = " + testTime[j] + " Memory = " + mem[j]); } } } }
Example 14
Source File: SimulationExperiments.java From tsml with GNU General Public License v3.0 | 4 votes |
public static void dictionarySimulatorChangingSeriesLength() throws Exception { Model.setDefaultSigma(1); boolean overwrite=true; int experiments=2; int numCases=2000; String writePath="Z:/Results Working Area/DictionaryBased/SimulationExperimentsMemMonitor/"; for(int seriesLength=5000;seriesLength<=10000;seriesLength+=5000) { String dir="Cases1000SeriesLength"; File path = new File(writePath +dir+ seriesLength); path.mkdirs(); if(!overwrite) { File f1 = new File(writePath + dir + seriesLength + "/testAcc" + seriesLength + ".csv"); File f2 = new File(writePath + dir + seriesLength + "/trainTime" + seriesLength + ".csv"); File f3 = new File(writePath + dir + seriesLength + "/testTime" + seriesLength + ".csv"); File f4 = new File(writePath + dir + seriesLength + "/mem" + seriesLength + ".csv"); if(f1.exists() && f2.exists() && f3.exists() && f4.exists()){ System.out.println("SKIPPING series length = "+seriesLength+" as all already present"); continue; } } OutFile accFile = new OutFile(writePath + "DictionarySeriesLength" + seriesLength + "/testAcc" + seriesLength + ".csv"); OutFile trainTimeFile = new OutFile(writePath + "DictionarySeriesLength" + seriesLength +"/trainTime" + seriesLength + ".csv"); OutFile testTimeFile = new OutFile(writePath + "DictionarySeriesLength" + seriesLength + "/testTime" + seriesLength + ".csv"); OutFile memFile = new OutFile(writePath + "DictionarySeriesLength" + seriesLength + "/mem" + seriesLength + ".csv"); System.out.println(" Generating simulated data ...."); int[] casesPerClass = new int[2]; casesPerClass[0] = casesPerClass[1] = numCases/2; int[] shapesPerClass = new int[]{5, 20}; long t1, t2; String[] classifierNames = {"cBOSS","S-BOSS","WEASEL","BOSS"}; double[] acc = new double[classifierNames.length]; long[] trainTime = new long[classifierNames.length]; long[] testTime = new long[classifierNames.length]; long[] finalMem = new long[classifierNames.length]; long[] maxMem = new long[classifierNames.length]; for (int i = 0; i < experiments; i++) { Instances data = SimulateDictionaryData.generateDictionaryData(seriesLength, casesPerClass, shapesPerClass); Instances[] split = InstanceTools.resampleInstances(data, i, 0.2); System.out.println(" series length =" + seriesLength + " Experiment Index" + i + " Train size =" + split[0].numInstances() + " test size =" + split[1].numInstances()); for (int j = 0; j < classifierNames.length; j++) { System.gc(); MemoryMonitor monitor=new MemoryMonitor(); monitor.installMonitor(); long memoryBefore = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); Classifier c = ClassifierLists.setClassifierClassic(classifierNames[j], i); t1 = System.nanoTime(); c.buildClassifier(split[0]); trainTime[j] = System.nanoTime() - t1; t1 = System.nanoTime(); acc[j] = ClassifierTools.accuracy(split[1], c); testTime[j] = System.nanoTime() - t1; System.gc(); finalMem[j] = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory() - memoryBefore; maxMem[j]=monitor.getMaxMemoryUsed(); System.out.println("\t" + classifierNames[j] + " ACC = " + acc[j] + " Train Time =" + trainTime[j] + " Test Time = " + testTime[j] + " Final Memory = " + finalMem[j]/1000000+" Max Memory ="+maxMem[j]/1000000); } accFile.writeString(i + ""); for (int j = 0; j < classifierNames.length; j++) accFile.writeString("," + acc[j]); accFile.writeString("\n"); trainTimeFile.writeString(i + ""); for (int j = 0; j < classifierNames.length; j++) trainTimeFile.writeString("," + trainTime[j]); trainTimeFile.writeString("\n"); testTimeFile.writeString(i + ""); for (int j = 0; j < classifierNames.length; j++) testTimeFile.writeString("," + testTime[j]); testTimeFile.writeString("\n"); memFile.writeString(i + ""); for (int j = 0; j < classifierNames.length; j++) { memFile.writeString("," + finalMem[j]); } memFile.writeString(","); for (int j = 0; j < classifierNames.length; j++) { memFile.writeString("," + maxMem[j]); } memFile.writeString("\n"); } } }
Example 15
Source File: SimulationExperiments.java From tsml with GNU General Public License v3.0 | 4 votes |
public static void dictionarySimulatorChangingTrainSize() throws Exception { Model.setDefaultSigma(1); boolean overwrite=false; int seriesLength = 1000; int experiments=2; String writePath="Z:/Results Working Area/DictionaryBased/SimulationExperimentsMemMonitor2/"; for(int trainSize=500;trainSize<=10000;trainSize+=500) { File path = new File(writePath + "DictionaryTrainSize" + trainSize); path.mkdirs(); if(!overwrite) { File f1 = new File(writePath + "DictionaryTrainSize" + trainSize + "/testAcc" + trainSize + ".csv"); File f2 = new File(writePath + "DictionaryTrainSize" + trainSize + "/trainTime" + trainSize + ".csv"); File f3 = new File(writePath + "DictionaryTrainSize" + trainSize + "/testTime" + trainSize + ".csv"); File f4 = new File(writePath + "DictionaryTrainSize" + trainSize + "/mem" + trainSize + ".csv"); if(f1.exists() && f2.exists() && f3.exists() && f4.exists()){ System.out.println("SKIPPING train size = "+trainSize+" as all already present"); continue; } } OutFile accFile = new OutFile(writePath + "DictionaryTrainSize" + trainSize + "/testAcc" + trainSize + ".csv"); OutFile trainTimeFile = new OutFile(writePath + "DictionaryTrainSize" + trainSize +"/trainTime" + trainSize + ".csv"); OutFile testTimeFile = new OutFile(writePath + "DictionaryTrainSize" + trainSize + "/testTime" + trainSize + ".csv"); OutFile memFile = new OutFile(writePath + "DictionaryTrainSize" + trainSize + "/mem" + trainSize + ".csv"); System.out.println(" Generating simulated data for n ="+trainSize+" Series Length ="+seriesLength+" ...."); int[] casesPerClass = new int[2]; casesPerClass[0] = casesPerClass[1] = trainSize; int[] shapesPerClass = new int[]{5, 20}; long t1, t2; String[] classifierNames = {"cBOSS", "BOSS","WEASEL","S-BOSS"}; double[] acc = new double[classifierNames.length]; long[] trainTime = new long[classifierNames.length]; long[] testTime = new long[classifierNames.length]; long[] finalMem = new long[classifierNames.length]; long[] maxMem = new long[classifierNames.length]; for (int i = 0; i < experiments; i++) { Instances data = SimulateDictionaryData.generateDictionaryData(500, casesPerClass, shapesPerClass); Instances[] split = InstanceTools.resampleInstances(data, i, 0.5); System.out.println("Series Length =" + seriesLength + " Experiment Index: " + i + " Train size =" + split[0].numInstances() + " test size =" + split[1].numInstances()); for (int j = 0; j < classifierNames.length; j++) { System.gc(); MemoryMonitor monitor=new MemoryMonitor(); monitor.installMonitor(); long memoryBefore = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); Classifier c = ClassifierLists.setClassifierClassic(classifierNames[j], i); t1 = System.nanoTime(); c.buildClassifier(split[0]); trainTime[j] = System.nanoTime() - t1; t1 = System.nanoTime(); acc[j] = ClassifierTools.accuracy(split[1], c); testTime[j] = System.nanoTime() - t1; System.gc(); finalMem[j] = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory() - memoryBefore; maxMem[j]=monitor.getMaxMemoryUsed(); System.out.println("\t" + classifierNames[j] + " ACC = " + acc[j] + " Train Time =" + trainTime[j] + " Test Time = " + testTime[j] + " Final Memory = " + finalMem[j]/1000000+" Max Memory ="+maxMem[j]/1000000); } accFile.writeString(i + ""); for (int j = 0; j < classifierNames.length; j++) accFile.writeString("," + acc[j]); accFile.writeString("\n"); trainTimeFile.writeString(i + ""); for (int j = 0; j < classifierNames.length; j++) trainTimeFile.writeString("," + trainTime[j]); trainTimeFile.writeString("\n"); testTimeFile.writeString(i + ""); for (int j = 0; j < classifierNames.length; j++) testTimeFile.writeString("," + testTime[j]); testTimeFile.writeString("\n"); memFile.writeString(i + ""); for (int j = 0; j < classifierNames.length; j++) { memFile.writeString("," + finalMem[j]); } memFile.writeString(","); for (int j = 0; j < classifierNames.length; j++) { memFile.writeString("," + maxMem[j]); } memFile.writeString("\n"); } } }
Example 16
Source File: SimulationExperiments.java From tsml with GNU General Public License v3.0 | 4 votes |
public static double singleSampleExperiment(Instances train, Instances test, Classifier c, int sample,String preds){ double acc=0; OutFile p=new OutFile(preds+"/testFold"+sample+".csv"); // hack here to save internal CV for further ensembling // if(c instanceof TrainAccuracyEstimate) // ((TrainAccuracyEstimate)c).writeCVTrainToFile(preds+"/trainFold"+sample+".csv"); if(c instanceof SaveableEnsemble) ((SaveableEnsemble)c).saveResults(preds+"/internalCV_"+sample+".csv",preds+"/internalTestPreds_"+sample+".csv"); try{ c.buildClassifier(train); int[][] predictions=new int[test.numInstances()][2]; for(int j=0;j<test.numInstances();j++){ predictions[j][0]=(int)test.instance(j).classValue(); test.instance(j).setMissing(test.classIndex());//Just in case .... } for(int j=0;j<test.numInstances();j++) { predictions[j][1]=(int)c.classifyInstance(test.instance(j)); if(predictions[j][0]==predictions[j][1]) acc++; } acc/=test.numInstances(); String[] names=preds.split("/"); p.writeLine(names[names.length-1]+","+c.getClass().getName()+",test"); if(c instanceof EnhancedAbstractClassifier) p.writeLine(((EnhancedAbstractClassifier)c).getParameters()); else if(c instanceof SaveableEnsemble) p.writeLine(((SaveableEnsemble)c).getParameters()); else p.writeLine("NoParameterInfo"); p.writeLine(acc+""); for(int j=0;j<test.numInstances();j++){ p.writeString(predictions[j][0]+","+predictions[j][1]+","); double[] dist =c.distributionForInstance(test.instance(j)); for(double d:dist) p.writeString(","+d); p.writeString("\n"); } }catch(Exception e) { System.out.println(" Error ="+e+" in method simpleExperiment"+e); e.printStackTrace(); System.out.println(" TRAIN "+train.relationName()+" has "+train.numAttributes()+" attributes and "+train.numInstances()+" instances"); System.out.println(" TEST "+test.relationName()+" has "+test.numAttributes()+" attributes and "+test.numInstances()+" instances"); System.exit(0); } return acc; }
Example 17
Source File: CrossValidationExperiments.java From NLIWOD with GNU Affero General Public License v3.0 | 4 votes |
public static void main(String[] args) throws Exception { Path datapath= Paths.get("./src/main/resources/old/Qald6Logs.arff"); BufferedReader reader = new BufferedReader(new FileReader(datapath.toString())); ArffReader arff = new ArffReader(reader); Instances data = arff.getData(); data.setClassIndex(6); ArrayList<String> systems = Lists.newArrayList("KWGAnswer", "NbFramework", "PersianQA", "SemGraphQA", "UIQA_withoutManualEntries", "UTQA_English" ); int seed = 133; // Change to 100 for leave-one-out CV int folds = 10; Random rand = new Random(seed); Instances randData = new Instances(data); randData.randomize(rand); float cv_ave_f = 0; for(int n=0; n < folds; n++){ Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); //Change to the Classifier of your choice CDN Classifier = new CDN(); Classifier.buildClassifier(train); float ave_p = 0; float ave_r = 0; for(int j = 0; j < test.size(); j++){ Instance ins = test.get(j); int k = 0; for(int l=0; l < data.size(); l++){ Instance tmp = data.get(l); if(tmp.toString().equals(ins.toString())){ k = l; } } double[] confidences = Classifier.distributionForInstance(ins); int argmax = -1; double max = -1; for(int i = 0; i < 6; i++){ if(confidences[i]>max){ max = confidences[i]; argmax = i; } } String sys2ask = systems.get(systems.size() - argmax -1); ave_p += Float.parseFloat(Utils.loadSystemP(sys2ask).get(k)); ave_r += Float.parseFloat(Utils.loadSystemR(sys2ask).get(k)); } double p = ave_p/test.size(); double r = ave_r/test.size(); double fmeasure = 0; if(p>0&&r>0){fmeasure = 2*p*r/(p + r);} System.out.println("macro F on fold " + n + ": " + fmeasure); cv_ave_f += fmeasure/folds; } System.out.println("macro F average: " + cv_ave_f); System.out.println('\n'); }
Example 18
Source File: Ensemble.java From AILibs with GNU Affero General Public License v3.0 | 4 votes |
@Override public void buildClassifier(final Instances data) throws Exception { for (Classifier c : this) { c.buildClassifier(data); } }
Example 19
Source File: SimulationExperiments.java From tsml with GNU General Public License v3.0 | 4 votes |
/** Runs a single fold experiment, saving all output. * * @param train * @param test * @param c * @param sample * @param preds * @return */ public static double singleSampleExperiment(Instances train, Instances test, Classifier c, int sample,String preds){ double acc=0; OutFile p=new OutFile(preds+"/testFold"+sample+".csv"); // hack here to save internal CV for further ensembling if(EnhancedAbstractClassifier.classifierAbleToEstimateOwnPerformance(c)) ((EnhancedAbstractClassifier)c).setEstimateOwnPerformance(true); if(c instanceof SaveableEnsemble) ((SaveableEnsemble)c).saveResults(preds+"/internalCV_"+sample+".csv",preds+"/internalTestPreds_"+sample+".csv"); try{ c.buildClassifier(train); if(EnhancedAbstractClassifier.classifierIsEstimatingOwnPerformance(c)) ((EnhancedAbstractClassifier)c).getTrainResults().writeFullResultsToFile(preds+"/trainFold"+sample+".csv"); int[][] predictions=new int[test.numInstances()][2]; for(int j=0;j<test.numInstances();j++){ predictions[j][0]=(int)test.instance(j).classValue(); test.instance(j).setMissing(test.classIndex());//Just in case .... } for(int j=0;j<test.numInstances();j++) { predictions[j][1]=(int)c.classifyInstance(test.instance(j)); if(predictions[j][0]==predictions[j][1]) acc++; } acc/=test.numInstances(); String[] names=preds.split("/"); p.writeLine(names[names.length-1]+","+c.getClass().getName()+",test"); if(c instanceof EnhancedAbstractClassifier) p.writeLine(((EnhancedAbstractClassifier)c).getParameters()); else if(c instanceof SaveableEnsemble) p.writeLine(((SaveableEnsemble)c).getParameters()); else p.writeLine("NoParameterInfo"); p.writeLine(acc+""); for(int j=0;j<test.numInstances();j++){ p.writeString(predictions[j][0]+","+predictions[j][1]+","); double[] dist =c.distributionForInstance(test.instance(j)); for(double d:dist) p.writeString(","+d); p.writeString("\n"); } }catch(Exception e) { System.out.println(" Error ="+e+" in method simpleExperiment"+e); e.printStackTrace(); System.out.println(" TRAIN "+train.relationName()+" has "+train.numAttributes()+" attributes and "+train.numInstances()+" instances"); System.out.println(" TEST "+test.relationName()+" has "+test.numAttributes()+" attributes and "+test.numInstances()+" instances"); System.exit(0); } return acc; }
Example 20
Source File: ClassificationExamples.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * * @param train: the standard train fold Instances from the archive * @param test: the standard test fold Instances from the archive * @param c: Classifier to evaluate * @param fold: integer to indicate which fold. Set to 0 to just use train/test * @param resultsPath: a string indicating where to store the results * @return the accuracy of c on fold for problem given in train/test * * NOTES: * 1. If the classifier is a SaveableEnsemble, then we save the internal cross * validation accuracy and the internal test predictions * 2. The output of the file testFold+fold+.csv is * Line 1: ProblemName,ClassifierName, train/test * Line 2: parameter information for final classifier, if it is available * Line 3: test accuracy * then each line is * Actual Class, Predicted Class, Class probabilities * * */ public static double singleClassifierAndFold(Instances train, Instances test, Classifier c, int fold,String resultsPath){ Instances[] data=InstanceTools.resampleTrainAndTestInstances(train, test, fold); double acc=0; int act; int pred; // Save internal info for ensembles if(c instanceof SaveableEnsemble) ((SaveableEnsemble)c).saveResults(resultsPath+"/internalCV_"+fold+".csv",resultsPath+"/internalTestPreds_"+fold+".csv"); try{ c.buildClassifier(data[0]); StringBuilder str = new StringBuilder(); DecimalFormat df=new DecimalFormat("##.######"); for(int j=0;j<data[1].numInstances();j++) { act=(int)data[1].instance(j).classValue(); double[] probs=c.distributionForInstance(data[1].instance(j)); pred=0; for(int i=1;i<probs.length;i++){ if(probs[i]>probs[pred]) pred=i; } if(act==pred) acc++; str.append(act); str.append(","); str.append(pred); str.append(",,"); for(double d:probs){ str.append(df.format(d)); str.append(","); } str.append("\n"); } acc/=data[1].numInstances(); OutFile p=new OutFile(resultsPath+"/testFold"+fold+".csv"); p.writeLine(train.relationName()+","+c.getClass().getName()+",test"); if(c instanceof EnhancedAbstractClassifier){ p.writeLine(((EnhancedAbstractClassifier)c).getParameters()); }else p.writeLine("No parameter info"); p.writeLine(acc+""); p.writeLine(str.toString()); }catch(Exception e) { System.out.println(" Error ="+e+" in method simpleExperiment"+e); e.printStackTrace(); System.out.println(" TRAIN "+train.relationName()+" has "+train.numAttributes()+" attributes and "+train.numInstances()+" instances"); System.out.println(" TEST "+test.relationName()+" has "+test.numAttributes()+" attributes"+test.numInstances()+" instances"); System.exit(0); } return acc; }