weka.classifiers.Evaluation#evaluateModel

Source File: TestWekaBayes.java From Java-Data-Analysis with MIT License

8 votes

public static void main(String[] args) throws Exception {
//        ConverterUtils.DataSource source = new ConverterUtils.DataSource("data/AnonFruit.arff");
        DataSource source = new DataSource("data/AnonFruit.arff");
        Instances train = source.getDataSet();
        train.setClassIndex(3);  // target attribute: (Sweet)
        //build model
        NaiveBayes model=new NaiveBayes();
        model.buildClassifier(train);

        //use
        Instances test = train;
        Evaluation eval = new Evaluation(test);
        eval.evaluateModel(model,test);
        List <Prediction> predictions = eval.predictions();
        int k = 0;
        for (Instance instance : test) {
            double actual = instance.classValue();
            double prediction = eval.evaluateModelOnce(model, instance);
            System.out.printf("%2d.%4.0f%4.0f", ++k, actual, prediction);
            System.out.println(prediction != actual? " *": "");
        }
    }

Source File: TestUtil.java From wekaDeeplearning4j with GNU General Public License v3.0

6 votes

/**
 * Perform simple holdout with a given percentage
 *
 * @param clf Classifier
 * @param data Full dataset
 * @param p Split percentage
 */
public static void holdout(Classifier clf, Instances data, double p) throws Exception {
  Instances[] split = splitTrainTest(data, p);

  Instances train = split[0];
  Instances test = split[1];

  logger.info("Classifier: \n{}", clf.toString());
  clf.buildClassifier(train);
  Evaluation trainEval = new Evaluation(train);
  trainEval.evaluateModel(clf, train);
  logger.info("Weka Train Evaluation:");
  logger.info(trainEval.toSummaryString());
  if (!data.classAttribute().isNumeric()) {
    logger.info(trainEval.toMatrixString());
  }

  Evaluation testEval = new Evaluation(train);
  logger.info("Weka Test Evaluation:");
  testEval.evaluateModel(clf, test);
  logger.info(testEval.toSummaryString());
  if (!data.classAttribute().isNumeric()) {
    logger.info(testEval.toMatrixString());
  }
}

Source File: ReductionOptimizer.java From AILibs with GNU Affero General Public License v3.0

6 votes

private int getLossForClassifier(final MCTreeNode tree, final Instances data) {

		this.completeTree(tree);

		synchronized (this) {
			/* now eval the tree */
			try {
				DescriptiveStatistics stats = new DescriptiveStatistics();
				for (int i = 0; i < 2; i++) {
					List<IWekaInstances> split = (WekaUtil.getStratifiedSplit(new WekaInstances(data), this.seed + i, .6f));
					tree.buildClassifier(split.get(0).getList());

					Evaluation eval = new Evaluation(data);
					eval.evaluateModel(tree, split.get(1).getList());
					stats.addValue(eval.pctIncorrect());
				}
				return (int) Math.round((stats.getMean() * 100));

			} catch (Exception e) {
				this.logger.error(LoggerUtil.getExceptionInfo(e));
				return Integer.MAX_VALUE;
			}
		}

	}

Source File: NBTreeNoSplit.java From tsml with GNU General Public License v3.0

6 votes

/**
  * Utility method for fast 5-fold cross validation of a naive bayes
  * model
  *
  * @param fullModel a <code>NaiveBayesUpdateable</code> value
  * @param trainingSet an <code>Instances</code> value
  * @param r a <code>Random</code> value
  * @return a <code>double</code> value
  * @exception Exception if an error occurs
  */
 public static double crossValidate(NaiveBayesUpdateable fullModel,
		       Instances trainingSet,
		       Random r) throws Exception {
   // make some copies for fast evaluation of 5-fold xval
   Classifier [] copies = AbstractClassifier.makeCopies(fullModel, 5);
   Evaluation eval = new Evaluation(trainingSet);
   // make some splits
   for (int j = 0; j < 5; j++) {
     Instances test = trainingSet.testCV(5, j);
     // unlearn these test instances
     for (int k = 0; k < test.numInstances(); k++) {
test.instance(k).setWeight(-test.instance(k).weight());
((NaiveBayesUpdateable)copies[j]).updateClassifier(test.instance(k));
// reset the weight back to its original value
test.instance(k).setWeight(-test.instance(k).weight());
     }
     eval.evaluateModel(copies[j], test);
   }
   return eval.incorrect();
 }

Source File: StabilityTest.java From wekaDeeplearning4j with GNU General Public License v3.0

6 votes

public static void evaluate(Dl4jMlpClassifier clf, Instances data, double minPerfomance)
    throws Exception {
  Instances[] split = TestUtil.splitTrainTest(data);

  Instances train = split[0];
  Instances test = split[1];

  clf.buildClassifier(train);
  Evaluation trainEval = new Evaluation(train);
  trainEval.evaluateModel(clf, train);

  Evaluation testEval = new Evaluation(train);
  testEval.evaluateModel(clf, test);

  final double testPctCorrect = testEval.pctCorrect();
  final double trainPctCorrect = trainEval.pctCorrect();

  log.info("Train: {}, Test: {}", trainPctCorrect, testPctCorrect);
  boolean success = testPctCorrect > minPerfomance && trainPctCorrect > minPerfomance;
  log.info("Success: " + success);

  log.info(clf.getModel().conf().toYaml());
  Assert.assertTrue("Performance was < " + minPerfomance + ". TestPctCorrect: " + testPctCorrect
      + ", TrainPctCorrect: " + trainPctCorrect, success);
}

Source File: AllPairsTable.java From AILibs with GNU Affero General Public License v3.0

6 votes

public AllPairsTable(final Instances training, final Instances validation, final Classifier c) throws Exception {
	Collection<String> classes = WekaUtil.getClassesActuallyContainedInDataset(training);
	for (Collection<String> set : SetUtil.getAllPossibleSubsetsWithSize(classes, 2)) {
		List<String> pair = set.stream().sorted().collect(Collectors.toList());
		String a = pair.get(0);
		String b = pair.get(1);
		Instances trainingData = WekaUtil.getInstancesOfClass(training, a);
		trainingData.addAll(WekaUtil.getInstancesOfClass(training, b));

		c.buildClassifier(trainingData);

		Instances validationData = WekaUtil.getInstancesOfClass(validation, a);
		validationData.addAll(WekaUtil.getInstancesOfClass(validation, b));
		Evaluation eval = new Evaluation(trainingData);
		eval.evaluateModel(c, validationData);


		if (!this.separabilities.containsKey(a)) {
			this.separabilities.put(a, new HashMap<>());
		}
		this.separabilities.get(a).put(b, eval.pctCorrect() / 100);

	}
	this.classCount = WekaUtil.getNumberOfInstancesPerClass(training);
	this.sum = training.size();
}

Source File: EvaluationUtils.java From AILibs with GNU Affero General Public License v3.0

5 votes

public static double evaluateMLPlan(final int timeout, final Instances training, final Instances test,
		final int seed, final Logger logger, final int numCores)
				throws Exception {

	logger.debug("Starting ML-Plan execution. Training on {} instances with "
			+ "{} attributes.", training.numInstances(), training.numAttributes());

	/* Initialize MLPlan using WEKA components */
	MLPlanWekaBuilder builder = AbstractMLPlanBuilder.forWeka();
	builder.withTimeOut(new Timeout(timeout, TimeUnit.SECONDS));
	builder.withNumCpus(numCores);
	builder.withDataset(training);
	MLPlan mlplan = builder.build();
	mlplan.setRandomSeed(seed);
	Classifier clf = mlplan.call();

	if (mlplan.getSelectedClassifier() == null
			|| ((MLPipeline) mlplan.getSelectedClassifier()).getBaseClassifier() == null) {
		logger.warn("Could not find a model using ML-Plan. Returning -1...");
		return -1;
	}

	String solutionString = ((MLPipeline) mlplan.getSelectedClassifier()).getBaseClassifier().getClass().getName()
			+ " | " + ((MLPipeline) mlplan.getSelectedClassifier()).getPreprocessors();
	logger.debug("Selected classifier: {}", solutionString);

	/* evaluate solution produced by mlplan */
	Evaluation eval = new Evaluation(training);
	eval.evaluateModel(clf, test);

	return eval.pctCorrect();
}

Source File: EvaluationUtils.java From AILibs with GNU Affero General Public License v3.0

5 votes

public static double performEnsemble(Instances instances) throws Exception {
	List<Instances> subsample = WekaUtil.getStratifiedSplit(instances, 42, .05f);
	instances = subsample.get(0);

	/* Relief */
	ReliefFAttributeEval relief = new ReliefFAttributeEval();
	relief.buildEvaluator(instances);
	double attEvalSum = 0;
	for (int i = 0; i < instances.numAttributes() - 1; i++) {
		attEvalSum += relief.evaluateAttribute(i);
	}
	attEvalSum /= instances.numAttributes();

	/* Variance */
	double varianceMean = 0;
	int totalNumericCount = 0;
	for (int i = 0; i < instances.numAttributes() - 1; i++) {
		if (instances.attribute(i).isNumeric()) {
			instances.attributeStats(i).numericStats.calculateDerived();
			varianceMean += Math.pow(instances.attributeStats(i).numericStats.stdDev, 2);
			totalNumericCount++;
		}
	}
	varianceMean /= (totalNumericCount != 0 ? totalNumericCount : 1);

	/* KNN */
	List<Instances> split = WekaUtil.getStratifiedSplit(instances, 42, .7f);
	IBk knn = new IBk(10);
	knn.buildClassifier(split.get(0));
	Evaluation eval = new Evaluation(split.get(0));
	eval.evaluateModel(knn, split.get(1));
	double knnResult = eval.pctCorrect() / 100d;

	return 1 - (0.33 * attEvalSum + 0.33 * knnResult + 0.33 * varianceMean);
}

Source File: Util.java From AILibs with GNU Affero General Public License v3.0

5 votes

public static List<Map<String, Object>> conductSingleOneStepReductionExperiment(final ReductionExperiment experiment) throws Exception {
	/* load data */
	Instances data = new Instances(new BufferedReader(new FileReader(experiment.getDataset())));
	data.setClassIndex(data.numAttributes() - 1);

	/* prepare basis for experiments */
	int seed = experiment.getSeed();
	Classifier classifierForRPNDSplit = AbstractClassifier.forName(experiment.getNameOfInnerClassifier(), null);
	Classifier leftClassifier = AbstractClassifier.forName(experiment.getNameOfLeftClassifier(), null);
	Classifier innerClassifier = AbstractClassifier.forName(experiment.getNameOfInnerClassifier(), null);
	Classifier rightClassifier = AbstractClassifier.forName(experiment.getNameOfRightClassifier(), null);

	RPNDSplitter splitter = new RPNDSplitter(new Random(seed), classifierForRPNDSplit);

	/* conduct experiments */
	List<Map<String, Object>> results = new ArrayList<>();
	for (int k = 0; k < 10; k++) {
		List<Collection<String>> classSplit;
		try {
			classSplit = new ArrayList<>(splitter.split(data));
		} catch (Exception e) {
			throw new RuntimeException("Could not create RPND split.", e);
		}
		MCTreeNodeReD classifier = new MCTreeNodeReD(innerClassifier, classSplit.get(0), leftClassifier, classSplit.get(1), rightClassifier);
		long start = System.currentTimeMillis();
		Map<String, Object> result = new HashMap<>();
		List<Instances> dataSplit = WekaUtil.getStratifiedSplit(data, (seed + k), .7);
		classifier.buildClassifier(dataSplit.get(0));
		long time = System.currentTimeMillis() - start;
		Evaluation eval = new Evaluation(dataSplit.get(0));
		eval.evaluateModel(classifier, dataSplit.get(1));
		double loss = (100 - eval.pctCorrect()) / 100f;
		logger.info("Conducted experiment {} with split {}/{}. Loss: {}. Time: {}ms.", k, classSplit.get(0), classSplit.get(1), loss, time);
		result.put("errorRate", loss);
		result.put(LABEL_TRAIN_TIME, time);
		results.add(result);
	}
	return results;
}

Source File: WekaDeeplearning4jExamples.java From wekaDeeplearning4j with GNU General Public License v3.0

5 votes

private static void dl4jResnet50() throws Exception {
        String folderPath = "src/test/resources/nominal/plant-seedlings-small";
        ImageDirectoryLoader loader = new ImageDirectoryLoader();
        loader.setInputDirectory(new File(folderPath));
        Instances inst = loader.getDataSet();
        inst.setClassIndex(1);

        Dl4jMlpClassifier classifier = new Dl4jMlpClassifier();
        classifier.setNumEpochs(3);

        KerasEfficientNet kerasEfficientNet = new KerasEfficientNet();
        kerasEfficientNet.setVariation(EfficientNet.VARIATION.EFFICIENTNET_B1);
        classifier.setZooModel(kerasEfficientNet);

        ImageInstanceIterator iterator = new ImageInstanceIterator();
        iterator.setImagesLocation(new File(folderPath));

        classifier.setInstanceIterator(iterator);

        // Stratify and split the data
        Random rand = new Random(0);
        inst.randomize(rand);
        inst.stratify(5);
        Instances train = inst.trainCV(5, 0);
        Instances test = inst.testCV(5, 0);

// Build the classifier on the training data
        classifier.buildClassifier(train);

// Evaluate the model on test data
        Evaluation eval = new Evaluation(test);
        eval.evaluateModel(classifier, test);

// Output some summary statistics
        System.out.println(eval.toSummaryString());
        System.out.println(eval.toMatrixString());
    }

Source File: Main-SVG.java From Java-for-Data-Science with MIT License

5 votes

public Main() {
    try {
        BufferedReader datafile;
        datafile = readDataFile("camping.txt");
        Instances data = new Instances(datafile);
        data.setClassIndex(data.numAttributes() - 1);

        Instances trainingData = new Instances(data, 0, 14);
        Instances testingData = new Instances(data, 14, 5);
        Evaluation evaluation = new Evaluation(trainingData);

        SMO smo = new SMO();
        smo.buildClassifier(data);

        evaluation.evaluateModel(smo, testingData);
        System.out.println(evaluation.toSummaryString());

        // Test instance 
        Instance instance = new DenseInstance(3);
        instance.setValue(data.attribute("age"), 78);
        instance.setValue(data.attribute("income"), 125700);
        instance.setValue(data.attribute("camps"), 1);            
        instance.setDataset(data);
        System.out.println("The instance: " + instance);
        System.out.println(smo.classifyInstance(instance));
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

Source File: LDAEvaluationTest.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Test
public void evaluateTest() throws Exception {
    logger.info("Starting LDA evaluation test...");

    /* load dataset and create a train-test-split */
    OpenmlConnector connector = new OpenmlConnector();
    DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID);
    File file = ds.getDataset(DataSetUtils.API_KEY);
    Instances data = new Instances(new BufferedReader(new FileReader(file)));
    data.setClassIndex(data.numAttributes() - 1);
    List<Instances> dataSplit = WekaUtil.getStratifiedSplit(data, 42, .05f);

    Instances insts = dataSplit.get(0);
    List<Instances> split = WekaUtil.getStratifiedSplit(insts, 42, .7f);

    long timeStart = System.currentTimeMillis();

    LDA lda = new LDA();
    lda.buildClassifier(split.get(0));

    long timeStartEval = System.currentTimeMillis();

    Evaluation eval = new Evaluation(split.get(0));
    eval.evaluateModel(lda, split.get(1));
    logger.debug("LDA pct correct: " + eval.pctCorrect());
    Assert.assertTrue(eval.pctCorrect() > 0);

    long timeTaken = System.currentTimeMillis() - timeStart;
    long timeTakenEval = System.currentTimeMillis() - timeStartEval;

    logger.debug("LDA took " + (timeTaken / 1000) + " s.");
    logger.debug("LDA eval took " + (timeTakenEval / 1000) + " s.");
}

Source File: DecisionTreeEstimator.java From jMetal with MIT License

4 votes

public double doPrediction(int index,S testSolution) {
  double result = 0.0d;

  try {
    int numberOfObjectives = solutionList.get(0).getNumberOfObjectives();
    //Attributes
    //numeric
    Attribute attr = new Attribute("my-numeric");

    //nominal
    ArrayList<String> myNomVals = new ArrayList<>();

    for (int i=0; i<numberOfObjectives; i++)
      myNomVals.add(VALUE_STRING+i);
    Attribute attr1 = new Attribute(NOMINAL_STRING, myNomVals);
    //System.out.println(attr1.isNominal());

    //string
    Attribute attr2 = new Attribute(MY_STRING, (List<String>)null);
    //System.out.println(attr2.isString());

    //2.create dataset
    ArrayList<Attribute> attrs = new ArrayList<>();
    attrs.add(attr);
    attrs.add(attr1);
    attrs.add(attr2);
    Instances dataset = new Instances("my_dataset", attrs, 0);

    //Add instances
    for (S solution : solutionList) {
      //instaces
      for (int i = 0; i <numberOfObjectives ; i++) {
        double[] attValues = new double[dataset.numAttributes()];
        attValues[0] = solution.getObjective(i);
        attValues[1] = dataset.attribute(NOMINAL_STRING).indexOfValue(VALUE_STRING+i);
        attValues[2] = dataset.attribute(MY_STRING).addStringValue(solution.toString()+i);
        dataset.add(new DenseInstance(1.0, attValues));
      }
    }


    //DataSet test
    Instances datasetTest = new Instances("my_dataset_test", attrs, 0);

    //Add instances
    for (int i = 0; i < numberOfObjectives; i++) {
      Instance test = new DenseInstance(3);
      test.setValue(attr, testSolution.getObjective(i));
      test.setValue(attr1, VALUE_STRING+i);
      test.setValue(attr2, testSolution.toString()+i);
      datasetTest.add(test);
    //  dataset.add(test);
    }


    //split to 70:30 learn and test set

    //Preprocess strings (almost no classifier supports them)
    StringToWordVector filter = new StringToWordVector();

    filter.setInputFormat(dataset);
    dataset = Filter.useFilter(dataset, filter);

    //Buid classifier
    dataset.setClassIndex(1);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);
    //resample if needed
    //dataset = dataset.resample(new Random(42));
    dataset.setClassIndex(1);
    datasetTest.setClassIndex(1);
    //do eval
    Evaluation eval = new Evaluation(datasetTest); //trainset
    eval.evaluateModel(classifier, datasetTest); //testset
    result = classifier.classifyInstance(datasetTest.get(index));
  } catch (Exception e) {
    result = testSolution.getObjective(index);
  }
  return result;
}

Source File: ModelEvaluation.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License

4 votes

/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try {
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/ModelEvaluation/segment-challenge.arff");
        Instances dt = src.getDataSet();
        dt.setClassIndex(dt.numAttributes()- 1);

        String[] options = new String[4];
        options[0] = "-C";
        options[1] = "0.1";
        options[2] = "-M";
        options[3] = "2";
        J48 mytree = new J48();
        mytree.setOptions(options);
        mytree.buildClassifier(dt);
        
        Evaluation eval = new Evaluation(dt);
        Random rand = new Random(1);
        
        DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/ModelEvaluation/segment-test.arff");
        Instances tdt = src1.getDataSet();
        tdt.setClassIndex(tdt.numAttributes() - 1);
        
        eval.evaluateModel(mytree, tdt);
        
        System.out.println(eval.toSummaryString("Evaluation results:\n", false));
            System.out.println("Correct % = " + eval.pctCorrect());
            System.out.println("Incorrect % = " + eval.pctIncorrect());
            System.out.println("kappa = " + eval.kappa());
            System.out.println("MAE = " + eval.meanAbsoluteError());
            System.out.println("RMSE = " + eval.rootMeanSquaredError());
            System.out.println("RAE = " + eval.relativeAbsoluteError());
            System.out.println("Precision = " + eval.precision(1));
            System.out.println("Recall = " + eval.recall(1));
            System.out.println("fMeasure = " + eval.fMeasure(1));
            System.out.println(eval.toMatrixString("=== Overall Confusion Matrix ==="));
    } catch (Exception e) {
        System.out.println("Error!!!!\n" + e.getMessage());
    }
}

Source File: IsotonicRegression.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Does the actual regression.
 */
protected void regress(Attribute attribute, Instances insts, boolean ascending) 
  throws Exception {

  // Sort values according to current attribute
  insts.sort(attribute);
  
  // Initialize arrays
  double[] values = new double[insts.numInstances()];
  double[] weights = new double[insts.numInstances()];
  double[] cuts = new double[insts.numInstances() - 1];
  int size = 0;
  values[0] = insts.instance(0).classValue();
  weights[0] = insts.instance(0).weight();
  for (int i = 1; i < insts.numInstances(); i++) {
    if (insts.instance(i).value(attribute) >
        insts.instance(i - 1).value(attribute)) {
      cuts[size] = (insts.instance(i).value(attribute) +
                    insts.instance(i - 1).value(attribute)) / 2;
      size++;
    }
    values[size] += insts.instance(i).classValue();
    weights[size] += insts.instance(i).weight();
  }
  size++;
  
  // While there is a pair of adjacent violators
  boolean violators;
  do {
    violators = false;
    
    // Initialize arrays
    double[] tempValues = new double[size];
    double[] tempWeights = new double[size];
    double[] tempCuts = new double[size - 1];
    
    // Merge adjacent violators
    int newSize = 0;
    tempValues[0] = values[0];
    tempWeights[0] = weights[0];
    for (int j = 1; j < size; j++) {
      if ((ascending && (values[j] / weights[j] > 
                         tempValues[newSize] / tempWeights[newSize])) ||
          (!ascending && (values[j] / weights[j] < 
                          tempValues[newSize] / tempWeights[newSize]))) {
        tempCuts[newSize] = cuts[j - 1];
        newSize++;
        tempValues[newSize] = values[j];
        tempWeights[newSize] = weights[j];
      } else {
        tempWeights[newSize] += weights[j];
        tempValues[newSize] += values[j];
        violators = true;
      }
    }
    newSize++;
    
    // Copy references
    values = tempValues;
    weights = tempWeights;
    cuts = tempCuts;
    size = newSize;
  } while (violators);
  
  // Compute actual predictions
  for (int i = 0; i < size; i++) {
    values[i] /= weights[i];
  }
  
  // Backup best instance variables
  Attribute attributeBackedup = m_attribute;
  double[] cutsBackedup = m_cuts;
  double[] valuesBackedup = m_values;
  
  // Set instance variables to values computed for this attribute
  m_attribute = attribute;
  m_cuts = cuts;
  m_values = values;
  
  // Compute sum of squared errors
  Evaluation eval = new Evaluation(insts);
  eval.evaluateModel(this, insts);
  double msq = eval.rootMeanSquaredError();
  
  // Check whether this is the best attribute
  if (msq < m_minMsq) {
    m_minMsq = msq;
  } else {
    m_attribute = attributeBackedup;
    m_cuts = cutsBackedup;
    m_values = valuesBackedup;
  }
}

Source File: DecisionTreeEstimator.java From jMetal with MIT License

4 votes

public double doPredictionVariable(int index,S testSolution) {
  double result = 0.0d;

  try {
    int numberOfVariables = solutionList.get(0).getNumberOfVariables();
    //Attributes
    //numeric
    Attribute attr = new Attribute("my-numeric");

    //nominal
    ArrayList<String> myNomVals = new ArrayList<>();

    for (int i=0; i<numberOfVariables; i++)
      myNomVals.add(VALUE_STRING+i);
    Attribute attr1 = new Attribute(NOMINAL_STRING, myNomVals);

    //string
    Attribute attr2 = new Attribute(MY_STRING, (List<String>)null);

    //2.create dataset
    ArrayList<Attribute> attrs = new ArrayList<>();
    attrs.add(attr);
    attrs.add(attr1);
    attrs.add(attr2);
    Instances dataset = new Instances("my_dataset", attrs, 0);

    //Add instances
    for (S solution : solutionList) {
      //instaces
      for (int i = 0; i <numberOfVariables ; i++) {
        double[] attValues = new double[dataset.numAttributes()];
        attValues[0] = ((DoubleSolution)solution).getVariable(i);
        attValues[1] = dataset.attribute(NOMINAL_STRING).indexOfValue(VALUE_STRING+i);
        attValues[2] = dataset.attribute(MY_STRING).addStringValue(solution.toString()+i);
        dataset.add(new DenseInstance(1.0, attValues));
      }
    }


    //DataSet test
    Instances datasetTest = new Instances("my_dataset_test", attrs, 0);

    //Add instances
    for (int i = 0; i < numberOfVariables; i++) {
      Instance test = new DenseInstance(3);
      test.setValue(attr, ((DoubleSolution)testSolution).getVariable(i));
      test.setValue(attr1, VALUE_STRING+i);
      test.setValue(attr2, testSolution.toString()+i);
      datasetTest.add(test);
      //  dataset.add(test);
    }


    //split to 70:30 learn and test set

    //Preprocess strings (almost no classifier supports them)
    StringToWordVector filter = new StringToWordVector();

    filter.setInputFormat(dataset);
    dataset = Filter.useFilter(dataset, filter);

    //Buid classifier
    dataset.setClassIndex(1);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);
    //resample if needed
    //dataset = dataset.resample(new Random(42));
    dataset.setClassIndex(1);
    datasetTest.setClassIndex(1);
    //do eval
    Evaluation eval = new Evaluation(datasetTest); //trainset
    eval.evaluateModel(classifier, datasetTest); //testset
    result = classifier.classifyInstance(datasetTest.get(index));
  } catch (Exception e) {
    result = ((DoubleSolution)testSolution).getVariable(index);
  }
  return result;
}

Source File: CVParameterSelection.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Finds the best parameter combination. (recursive for each parameter
  * being optimised).
  * 
  * @param depth the index of the parameter to be optimised at this level
  * @param trainData the data the search is based on
  * @param random a random number generator
  * @throws Exception if an error occurs
  */
 protected void findParamsByCrossValidation(int depth, Instances trainData,
				     Random random)
   throws Exception {

   if (depth < m_CVParams.size()) {
     CVParameter cvParam = (CVParameter)m_CVParams.elementAt(depth);

     double upper;
     switch ((int)(cvParam.m_Lower - cvParam.m_Upper + 0.5)) {
     case 1:
upper = m_NumAttributes;
break;
     case 2:
upper = m_TrainFoldSize;
break;
     default:
upper = cvParam.m_Upper;
break;
     }
     double increment = (upper - cvParam.m_Lower) / (cvParam.m_Steps - 1);
     for(cvParam.m_ParamValue = cvParam.m_Lower; 
  cvParam.m_ParamValue <= upper; 
  cvParam.m_ParamValue += increment) {
findParamsByCrossValidation(depth + 1, trainData, random);
     }
   } else {
     
     Evaluation evaluation = new Evaluation(trainData);

     // Set the classifier options
     String [] options = createOptions();
     if (m_Debug) {
System.err.print("Setting options for " 
		 + m_Classifier.getClass().getName() + ":");
for (int i = 0; i < options.length; i++) {
  System.err.print(" " + options[i]);
}
System.err.println("");
     }
     ((OptionHandler)m_Classifier).setOptions(options);
     for (int j = 0; j < m_NumFolds; j++) {

       // We want to randomize the data the same way for every 
       // learning scheme.
Instances train = trainData.trainCV(m_NumFolds, j, new Random(1));
Instances test = trainData.testCV(m_NumFolds, j);
m_Classifier.buildClassifier(train);
evaluation.setPriors(train);
evaluation.evaluateModel(m_Classifier, test);
     }
     double error = evaluation.errorRate();
     if (m_Debug) {
System.err.println("Cross-validated error rate: " 
		   + Utils.doubleToString(error, 6, 4));
     }
     if ((m_BestPerformance == -99) || (error < m_BestPerformance)) {

m_BestPerformance = error;
m_BestClassifierOptions = createOptions();
     }
   }
 }

Source File: WekaEmailIntentClassifier.java From EmailIntentDataSet with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
	
	if (args.length != 2) {
		System.out.println("Usage: WekaSpeechActClassifier <train_set_input_file> <test_set_input_file>");
		System.exit(0);
	}
	
	String arffFileTrain = args[0];
	String arffFileTest = args[1];

	LibSVM wekaClassifier = new LibSVM();
	wekaClassifier.setOptions(new String[] {"-B", "-H"});

	Instances preparedData = (Instances) SerializationHelper.read(arffFileTrain);
	Instances preparedTest = (Instances) SerializationHelper.read(arffFileTest);
	
	System.out.println("Reading train set and test set done!");

	System.out.print("\nTraining...");
	wekaClassifier.buildClassifier(preparedData);
	
	System.out.println("\nTraining...done!");
	
	Evaluation evalTrain = new Evaluation(preparedData);
	evalTrain.evaluateModel(wekaClassifier, preparedData);

	DecimalFormat formatter = new DecimalFormat("#0.0");
	
	System.out.println("\nEvaluating on trainSet...");
	System.out.println(evalTrain.toSummaryString());
	
	System.out.println("\nResult on trainSet...");
	System.out.println("Precision:" + formatter.format(100*evalTrain.precision(0)) + "%" +
			" - Recal: " + formatter.format(100*evalTrain.recall(0)) + "%" +
			" - F1: " + formatter.format(evalTrain.fMeasure(0)) + "%");
	
	Evaluation eval = new Evaluation(preparedTest);
	eval.evaluateModel(wekaClassifier, preparedTest);

	System.out.println("\nEvaluating on testSet...");
	System.out.println(eval.toSummaryString());
	
	System.out.println("\nResult on testSet...");
	System.out.println("Precision:" + formatter.format(100*eval.precision(0)) + "%" +
			" - Recal: " + formatter.format(100*eval.recall(0)) + "%" +
			" - F1: " + formatter.format(100*eval.fMeasure(0)) + "%");

	System.out.println("True positive rate: " + formatter.format(100*eval.truePositiveRate(0)) + "%" + 
			" - True negative rate: "  + formatter.format(100*eval.trueNegativeRate(0)) + "%");
	System.out.println("Accuracy: " + formatter.format(100*((eval.truePositiveRate(0) + eval.trueNegativeRate(0)) / 2)) + "%");
	
	System.out.println("\nDone!");
}

Source File: UCR_Trillion.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Running the experiment for a single dataset
 * @param datasetName
 * @throws Exception
 */
private static void singleProblem (String datasetName) throws Exception {
	// Setting output directory
	resDir = projectPath + "outputs/Benchmark/" + datasetName + "/";

	// Check if it exist, else create the directory
	File dir = new File(resDir);
	if (!dir.exists())
		dir.mkdirs();

	// Reading the dataset
	System.out.println("Processing: " + datasetName);
	Instances[] data = ExperimentsLauncher.readTrainAndTest(datasetPath, datasetName);

	Instances train = data[0];
	Instances test = data[1];

	// Go through different runs and randomize the dataset
       for (int i = 0; i < nbRuns; i++) {
       	// Sampling the dataset
       	train = Sampling.random(train);        	
       	
       	// Initialising the classifier
       	System.out.println("Run " + i + ", Launching " + method);
       	Trillion classifier = new Trillion(datasetName);
       	classifier.setResDir(resDir);
       	classifier.setType(method);
       	
       	// Training the classifier for best window
       	long start = System.nanoTime();
       	classifier.buildClassifier(train);
       	long stop = System.nanoTime();
       	double searchTime = (stop - start)/1e9;
       	System.out.println(searchTime + " s");

       	bestWarpingWindow = classifier.getBestWin();
       	bestScore = classifier.getBestScore();

       	// Evaluate the trained classfier with test set
       	Evaluation eval = new Evaluation(train);
       	eval.evaluateModel(classifier, test);
       	System.out.println(eval.errorRate());

       	// Save result
       	saveSearchTime(searchTime, eval.errorRate());
       }
}

Source File: UCR_UCRSuitePrunedDTW.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Running the experiment for a single dataset
 * @param datasetName
 * @throws Exception
 */
private static void singleProblem (String datasetName) throws Exception {
	// Setting output directory
	resDir = projectPath + "outputs/Benchmark/" + datasetName + "/";
       
	// Check if it exist, else create the directory
       File dir = new File(resDir);
       if (!dir.exists())
       	dir.mkdirs();
       
       // Reading the dataset
       System.out.println("Processing: " + datasetName);
       Instances[] data = ExperimentsLauncher.readTrainAndTest(datasetPath, datasetName);

       Instances train = data[0];
       Instances test = data[1];
       
       // Go through different runs and randomize the dataset
       for (int i = 0; i < nbRuns; i++) {
       	// Sampling the dataset
       	train = Sampling.random(train);        	
       	
       	// Initialising the classifier
       	System.out.println("Run " + i + ", Launching " + method);
       	UCRSuitePrunedDTW classifier = new UCRSuitePrunedDTW(datasetName);
       	classifier.setResDir(resDir);
       	classifier.setType(method);
       	
       	// Training the classifier for best window
       	long start = System.nanoTime();
       	classifier.buildClassifier(train);
       	long stop = System.nanoTime();
       	double searchTime = (stop - start)/1e9;
       	System.out.println(searchTime + " s");

       	bestWarpingWindow = classifier.getBestWin();
       	bestScore = classifier.getBestScore();

       	// Evaluate the trained classfier with test set
       	Evaluation eval = new Evaluation(train);
       	eval.evaluateModel(classifier, test);
       	System.out.println(eval.errorRate());

       	// Save result
       	saveSearchTime(searchTime, eval.errorRate());
       }
}

Java Code Examples for weka.classifiers.Evaluation#evaluateModel()