weka.classifiers.trees.RandomForest Java Exaples

Source File: MultivariateShapeletTransformClassifier.java From tsml with GNU General Public License v3.0

5 votes

public void configureEnsemble(){
    ensemble.setWeightingScheme(new TrainAcc(4));
    ensemble.setVotingScheme(new MajorityConfidence());
    
    Classifier[] classifiers = new Classifier[3];
    String[] classifierNames = new String[3];
    
    SMO smo = new SMO();
    smo.turnChecksOff();
    smo.setBuildLogisticModels(true);
    PolyKernel kl = new PolyKernel();
    kl.setExponent(2);
    smo.setKernel(kl);
    if (seedClassifier)
        smo.setRandomSeed((int)seed);
    classifiers[0] = smo;
    classifierNames[0] = "SVMQ";

    RandomForest r=new RandomForest();
    r.setNumTrees(500);
    if(seedClassifier)
       r.setSeed((int)seed);            
    classifiers[1] = r;
    classifierNames[1] = "RandF";
        
        
    RotationForest rf=new RotationForest();
    rf.setNumIterations(100);
    if(seedClassifier)
       rf.setSeed((int)seed);
    classifiers[2] = rf;
    classifierNames[2] = "RotF";
    
    
   ensemble.setClassifiers(classifiers, classifierNames, null);        
    
}

Source File: TunedRandomForest.java From tsml with GNU General Public License v3.0

5 votes

public static void cheatOnMNIST(){
    Instances train=DatasetLoading.loadDataNullable("\\\\cmptscsvr.cmp.uea.ac.uk\\ueatsc\\Data\\LargeProblems\\MNIST\\MNIST_TRAIN");
    Instances test=DatasetLoading.loadDataNullable("\\\\cmptscsvr.cmp.uea.ac.uk\\ueatsc\\Data\\LargeProblems\\MNIST\\MNIST_TEST");
    RandomForest rf=new RandomForest();
    System.out.println("Data loaded ......");
    double a =ClassifierTools.singleTrainTestSplitAccuracy(rf, train, test);
    System.out.println("Trees ="+10+" acc = "+a);
    for(int trees=50;trees<=1000;trees+=50){
        rf.setNumTrees(trees);
        a =ClassifierTools.singleTrainTestSplitAccuracy(rf, train, test);
        System.out.println("Trees ="+trees+" acc = "+a);
    }
    
}

Source File: CAWPE.java From tsml with GNU General Public License v3.0

5 votes

public final void setupAdvancedSettings() {
    this.ensembleName = "CAWPE-A";
    
    this.weightingScheme = new TrainAcc(4);
    this.votingScheme = new MajorityConfidence();
    
    CrossValidationEvaluator cv = new CrossValidationEvaluator(seed, false, false, false, false); 
    cv.setNumFolds(10);
    this.trainEstimator = cv; 

    Classifier[] classifiers = new Classifier[3];
    String[] classifierNames = new String[3];

    SMO smo = new SMO();
    smo.turnChecksOff();
    smo.setBuildLogisticModels(true);
    PolyKernel kl = new PolyKernel();
    kl.setExponent(2);
    smo.setKernel(kl);
    smo.setRandomSeed(seed);
    classifiers[0] = smo;
    classifierNames[0] = "SVMQ";
    RandomForest rf= new RandomForest();
    rf.setNumTrees(500);
    classifiers[1] = rf;
    classifierNames[1] = "RandF";
    RotationForest rotf=new RotationForest();
    rotf.setNumIterations(200);
    classifiers[2] = rotf;
    classifierNames[2] = "RotF";

    setClassifiers(classifiers, classifierNames, null);
}

Source File: RelExTool.java From Criteria2Query with Apache License 2.0

5 votes

public void trainClassifier(String trainfile,String modelpath) throws Exception{
	Classifier m_classifier = new RandomForest();
	File inputFile = new File(trainfile);
	ArffLoader atf = new ArffLoader(); 
	atf.setFile(inputFile);
	Instances instancesTrain = atf.getDataSet(); 
	instancesTrain.setClassIndex(6);
       m_classifier.buildClassifier(instancesTrain); 
       saveModel(m_classifier, modelpath);
}

Source File: WekaFilteredClassifierTest.java From Java-Data-Science-Cookbook with MIT License

5 votes

public void buildFilteredClassifier(){
	rf = new RandomForest();
	Remove rm = new Remove();
	rm.setAttributeIndices("1");
	FilteredClassifier fc = new FilteredClassifier();
	fc.setFilter(rm);
	fc.setClassifier(rf);
	try{
		fc.buildClassifier(weather);
		for (int i = 0; i < weather.numInstances(); i++){
			double pred = fc.classifyInstance(weather.instance(i));
			System.out.print("given value: " + weather.classAttribute().value((int) weather.instance(i).classValue()));
			System.out.println("---predicted value: " + weather.classAttribute().value((int) pred));
		}
	} catch (Exception e) {
	}
}

Source File: AutoFEWekaPipelineTest.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Test
public void testAutoFEWekaPipelineClone() throws Exception {

	Graph<IFilter> graph = new Graph<>();
	PretrainedNNFilter nnFilter = ImageUtils.getPretrainedNNFilterByName("VGG16", 5,
			DataSetUtilsTest.CIFAR10_INPUT_SHAPE);
	graph.addItem(nnFilter);

	FilterPipeline fp = new FilterPipeline(null, graph);

	AutoFEWekaPipeline pipeline = new AutoFEWekaPipeline(fp, new RandomForest());
	Classifier clonedClassifier = WekaUtil.cloneClassifier(pipeline);
	Assert.assertNotNull(clonedClassifier);
}

Source File: TimeSeriesBagOfFeaturesLearningAlgorithm.java From AILibs with GNU Affero General Public License v3.0

5 votes

/**
 * Function measuring the out-of-bag (OOB) probabilities using a cross
 * validation with <code>numFolds</code> many folds. For each fold, the data
 * given by <code>subSeqValueMatrix</code> is split into a training and test
 * set. The test set's probabilities are then derived by a trained Random Forest
 * classifier.
 *
 * @param subSeqValueMatrix
 *            Input data used to derive the OOB probabilities
 * @param targetMatrix
 *            The target values of the input data
 * @param numProbInstances
 *            Number of instances for which the probabilities should be derived
 * @param numFolds
 *            Number of folds used for the measurement
 * @param numClasses
 *            Number of total classes
 * @param rf
 *            Random Forest classifier which is retrained in each fold
 * @return Returns a matrix storing the probability for each input instance
 *         given by <code>subSeqValueMatrix</code>
 * @throws TrainingException
 *             Thrown when the classifier <code>rf</code> could not be trained
 *             in any fold
 */
public static double[][] measureOOBProbabilitiesUsingCV(final double[][] subSeqValueMatrix, final int[] targetMatrix, final int numProbInstances, final int numFolds, final int numClasses, final RandomForest rf)
		throws TrainingException {

	double[][] probs = new double[numProbInstances][numClasses];
	int numTestInstsPerFold = (int) ((double) probs.length / (double) numFolds);

	for (int i = 0; i < numFolds; i++) {
		// Generate training instances for fold
		Pair<TimeSeriesDataset2, TimeSeriesDataset2> trainingTestDatasets = TimeSeriesUtil.getTrainingAndTestDataForFold(i, numFolds, subSeqValueMatrix, targetMatrix);
		TimeSeriesDataset2 trainingDS = trainingTestDatasets.getX();

		WekaTimeseriesUtil.buildWekaClassifierFromSimplifiedTS(rf, trainingDS);

		// Prepare test instances
		TimeSeriesDataset2 testDataset = trainingTestDatasets.getY();
		Instances testInstances = WekaTimeseriesUtil.simplifiedTimeSeriesDatasetToWekaInstances(testDataset, IntStream.rangeClosed(0, numClasses - 1).boxed().map(String::valueOf).collect(Collectors.toList()));

		double[][] testProbs = null;
		try {
			testProbs = rf.distributionsForInstances(testInstances);
		} catch (Exception e) {
			throw new TrainingException("Could not induce test probabilities in OOB probability estimation due to an internal Weka error.", e);
		}

		// Store induced probabilities
		for (int j = 0; j < testProbs.length; j++) {
			probs[i * numTestInstsPerFold + j] = testProbs[j];
		}
	}

	return probs;
}

Source File: MultivariateShapeletTransformClassifier.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Classifiers used in the HIVE COTE paper
 */    
    public void configureDefaultEnsemble(){
//HIVE_SHAPELET_SVMQ    HIVE_SHAPELET_RandF    HIVE_SHAPELET_RotF    
//HIVE_SHAPELET_NN    HIVE_SHAPELET_NB    HIVE_SHAPELET_C45    HIVE_SHAPELET_SVML   
        ensemble=new CAWPE();
        ensemble.setWeightingScheme(new TrainAcc(4));
        ensemble.setVotingScheme(new MajorityConfidence());
        Classifier[] classifiers = new Classifier[7];
        String[] classifierNames = new String[7];
        
        SMO smo = new SMO();
        smo.turnChecksOff();
        smo.setBuildLogisticModels(true);
        PolyKernel kl = new PolyKernel();
        kl.setExponent(2);
        smo.setKernel(kl);
        if (seedClassifier)
            smo.setRandomSeed((int)seed);
        classifiers[0] = smo;
        classifierNames[0] = "SVMQ";

        RandomForest r=new RandomForest();
        r.setNumTrees(500);
        if(seedClassifier)
           r.setSeed((int)seed);            
        classifiers[1] = r;
        classifierNames[1] = "RandF";
            
            
        RotationForest rf=new RotationForest();
        rf.setNumIterations(100);
        if(seedClassifier)
           rf.setSeed((int)seed);
        classifiers[2] = rf;
        classifierNames[2] = "RotF";
        IBk nn=new IBk();
        classifiers[3] = nn;
        classifierNames[3] = "NN";
        NaiveBayes nb=new NaiveBayes();
        classifiers[4] = nb;
        classifierNames[4] = "NB";
        J48 c45=new J48();
        classifiers[5] = c45;
        classifierNames[5] = "C45";
        SMO svml = new SMO();
        svml.turnChecksOff();
        svml.setBuildLogisticModels(true);
        PolyKernel k2 = new PolyKernel();
        k2.setExponent(1);
        smo.setKernel(k2);
        classifiers[6] = svml;
        classifierNames[6] = "SVML";
        ensemble.setClassifiers(classifiers, classifierNames, null);
    }

Source File: CAWPE.java From tsml with GNU General Public License v3.0

4 votes

/**
     * Comps: NN, SVML, SVMQ, C4.5, NB,  RotF, RandF, BN,
     * Weight: TrainAcc
     * Vote: MajorityVote
     *
     * As used originally in ST_HESCA, COTE.
     * NOTE the original also contained Bayes Net (BN). We have removed it because the classifier crashes
     * unpredictably when discretising features (due to lack of variance in the feature, but not easily detected and
     * dealt with
     *
     */
    public final void setupOriginalHESCASettings() {
        this.ensembleName = "HESCA";
        
        this.weightingScheme = new TrainAcc();
        this.votingScheme = new MajorityVote();
        
        CrossValidationEvaluator cv = new CrossValidationEvaluator(seed, false, false, false, false); 
        cv.setNumFolds(10);
        this.trainEstimator = cv; 
        int numClassifiers=7;
        Classifier[] classifiers = new Classifier[numClassifiers];
        String[] classifierNames = new String[numClassifiers];

        kNN k=new kNN(100);
        k.setCrossValidate(true);
        k.normalise(false);
        k.setDistanceFunction(new EuclideanDistance());
        classifiers[0] = k;
        classifierNames[0] = "NN";

        classifiers[1] = new NaiveBayes();
        classifierNames[1] = "NB";

        classifiers[2] = new J48();
        classifierNames[2] = "C45";

        SMO svml = new SMO();
        svml.turnChecksOff();
        PolyKernel kl = new PolyKernel();
        kl.setExponent(1);
        svml.setKernel(kl);
        svml.setRandomSeed(seed);
        classifiers[3] = svml;
        classifierNames[3] = "SVML";

        SMO svmq =new SMO();
//Assumes no missing, all real valued and a discrete class variable
        svmq.turnChecksOff();
        PolyKernel kq = new PolyKernel();
        kq.setExponent(2);
        svmq.setKernel(kq);
        svmq.setRandomSeed(seed);
        classifiers[4] =svmq;
        classifierNames[4] = "SVMQ";

        RandomForest r=new RandomForest();
        r.setNumTrees(500);
        r.setSeed(seed);
        classifiers[5] = r;
        classifierNames[5] = "RandF";

        RotationForest rf=new RotationForest();
        rf.setNumIterations(50);
        rf.setSeed(seed);
        classifiers[6] = rf;
        classifierNames[6] = "RotF";

//        classifiers[7] = new BayesNet();
//        classifierNames[7] = "bayesNet";

        setClassifiers(classifiers, classifierNames, null);
    }

Source File: EnsembleProvider.java From AILibs with GNU Affero General Public License v3.0

4 votes

/**
 * Initializes the HIVE COTE ensemble consisting of 7 classifiers using a
 * majority voting strategy as described in J. Lines, S. Taylor and A. Bagnall,
 * "HIVE-COTE: The Hierarchical Vote Collective of Transformation-Based
 * Ensembles for Time Series Classification," 2016 IEEE 16th International
 * Conference on Data Mining (ICDM), Barcelona, 2016, pp. 1041-1046. doi:
 * 10.1109/ICDM.2016.0133.
 *
 * @param seed
 *            Seed used within the classifiers and the majority confidence
 *            voting scheme
 * @param numFolds
 *            Number of folds used within the determination of the classifier
 *            weights for the {@link MajorityConfidenceVote}
 * @return Returns the initialized (but untrained) HIVE COTE ensemble model.
 */
public static Classifier provideHIVECOTEEnsembleModel(final long seed) {
	Classifier[] classifier = new Classifier[7];

	Vote voter = new MajorityConfidenceVote(5, seed);

	// SMO poly2
	SMO smop = new SMO();
	smop.turnChecksOff();
	smop.setBuildCalibrationModels(true);
	PolyKernel kernel = new PolyKernel();
	kernel.setExponent(2);
	smop.setKernel(kernel);
	smop.setRandomSeed((int)seed);
	classifier[0] = smop;

	// Random Forest
	RandomForest rf = new RandomForest();
	rf.setSeed((int)seed);
	rf.setNumIterations(500);
	classifier[1] = rf;

	// Rotation forest
	RotationForest rotF = new RotationForest();
	rotF.setSeed((int)seed);
	rotF.setNumIterations(100);
	classifier[2] = rotF;

	// NN
	IBk nn = new IBk();
	classifier[3] = nn;

	// Naive Bayes
	NaiveBayes nb = new NaiveBayes();
	classifier[4] = nb;

	// C45
	J48 c45 = new J48();
	c45.setSeed((int)seed);
	classifier[5] = c45;

	// SMO linear
	SMO smol = new SMO();
	smol.turnChecksOff();
	smol.setBuildCalibrationModels(true);
	PolyKernel linearKernel = new PolyKernel();
	linearKernel.setExponent(1);
	smol.setKernel(linearKernel);
	classifier[6] = smol;

	voter.setClassifiers(classifier);
	return voter;
}

Source File: TimeSeriesBagOfFeaturesClassifier.java From AILibs with GNU Affero General Public License v3.0

4 votes

/**
 * @return the subseriesClf
 */
public RandomForest getSubseriesClf() {
	return this.subseriesClf;
}

Source File: TimeSeriesBagOfFeaturesClassifier.java From AILibs with GNU Affero General Public License v3.0

4 votes

/**
 * @return the finalClf
 */
public RandomForest getFinalClf() {
	return this.finalClf;
}

Source File: Ex02_Classifiers.java From tsml with GNU General Public License v3.0

2 votes

public static void main(String[] args) throws Exception {
    
    // We'll use this data throughout, see Ex01_Datahandling
    int seed = 0;
    Instances[] trainTest = DatasetLoading.sampleItalyPowerDemand(seed);
    Instances train = trainTest[0];
    Instances test = trainTest[1];

    // Here's the super basic workflow, this is pure weka: 
    RandomForest randf = new RandomForest();                       
    randf.setNumTrees(500);
    randf.setSeed(seed);
    
    randf.buildClassifier(train);                                   //aka fit, train
    
    double acc = .0;
    for (Instance testInst : test) {
        double pred = randf.classifyInstance(testInst);             //aka predict
        //double [] dist = randf.distributionForInstance(testInst); //aka predict_proba
        
        if (pred == testInst.classValue())
            acc++;
    }
    
    acc /= test.numInstances();
    System.out.println("Random Forest accuracy on ItalyPowerDemand: " + acc);

    
    
    
    
    
    
    
    
    // All classifiers implement the Classifier interface. this guarantees 
    // the buildClassifier, classifyInstance and distributionForInstance methods, 
    // which is mainly what we want
    // Most if not all classifiers should extend AbstractClassifier, which adds 
    // on a little extra common functionality
    
    
    // There are also a number of classifiers listed in experiments.ClassifierLists
    // This class is updated over time and may eventually turn in to factories etc
    // on the backend, but for now what this is just a way to get a classifier 
    // with defined settings (parameters etc). We use this to record the exact
    // parameters used in papers for example. We also use this to instantiate
    // particular classifiers from a string argument when running on clusters
    
    Classifier classifier = ClassifierLists.setClassifierClassic("RandF", seed);
    classifier.buildClassifier(train);
    classifier.distributionForInstance(test.instance(0));
    
    
    
}

Source File: TimeSeriesBagOfFeaturesClassifier.java From AILibs with GNU Affero General Public License v3.0

2 votes

/**
 * @param subseriesClf
 *            the subseriesClf to set
 */
public void setSubseriesClf(final RandomForest subseriesClf) {
	this.subseriesClf = subseriesClf;
}

Source File: TimeSeriesBagOfFeaturesClassifier.java From AILibs with GNU Affero General Public License v3.0

2 votes

/**
 * @param finalClf
 *            the finalClf to set
 */
public void setFinalClf(final RandomForest finalClf) {
	this.finalClf = finalClf;
}

weka.classifiers.trees.RandomForest Java Examples