Java Code Examples for weka.core.Instances#setClassIndex()
The following examples show how to use
weka.core.Instances#setClassIndex() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MultiResponseModelTrees.java From tsml with GNU General Public License v3.0 | 6 votes |
@Override public void buildClassifier(Instances data) throws Exception { //creating the 2class version of the insts numericClassInsts = new Instances(data); numericClassInsts.setClassIndex(0); //temporary numericClassInsts.deleteAttributeAt(numericClassInsts.numAttributes()-1); Attribute newClassAtt = new Attribute("newClassVal"); //numeric class numericClassInsts.insertAttributeAt(newClassAtt, numericClassInsts.numAttributes()); numericClassInsts.setClassIndex(numericClassInsts.numAttributes()-1); //temporary //and building the regressors regressors = new M5P[data.numClasses()]; double[] trueClassVals = data.attributeToDoubleArray(data.classIndex()); for (int c = 0; c < data.numClasses(); c++) { for (int i = 0; i < numericClassInsts.numInstances(); i++) { //if this inst is of the class we're currently handling (c), set new class val to 1 else 0 double cval = trueClassVals[i] == c ? 1 : 0; numericClassInsts.instance(i).setClassValue(cval); } regressors[c] = new M5P(); regressors[c].buildClassifier(numericClassInsts); } }
Example 2
Source File: DatasetLoader.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
public static Instances loadAngerMetaClassification() throws Exception { final Instances data = DatasetLoader .loadArff("src/test/resources/numeric/anger.meta.arff"); ArrayList<Attribute> atts = new ArrayList<>(); atts.add(data.attribute(0)); Attribute cls = new Attribute("cls", Arrays.asList("0", "1")); atts.add(cls); Instances dataDiscretized = new Instances("anger-classification", atts, data.numInstances()); dataDiscretized.setClassIndex(1); for (Instance datum : data) { Instance cpy = (Instance) datum.copy(); cpy.setDataset(dataDiscretized); cpy.setValue(0, datum.stringValue(0)); cpy.setValue(1, datum.classValue() > 0.5 ? "1" : "0"); dataDiscretized.add(cpy); } return dataDiscretized; }
Example 3
Source File: DatasetLoader.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
public static Instances loadAngerMetaClassification() throws Exception { final Instances data = DatasetLoader .loadArff("src/test/resources/numeric/anger.meta.arff"); ArrayList<Attribute> atts = new ArrayList<>(); atts.add(data.attribute(0)); Attribute cls = new Attribute("cls", Arrays.asList("0", "1")); atts.add(cls); Instances dataDiscretized = new Instances("anger-classification", atts, data.numInstances()); dataDiscretized.setClassIndex(1); for (Instance datum : data) { Instance cpy = (Instance) datum.copy(); cpy.setDataset(dataDiscretized); cpy.setValue(0, datum.stringValue(0)); cpy.setValue(1, datum.classValue() > 0.5 ? "1" : "0"); dataDiscretized.add(cpy); } return dataDiscretized; }
Example 4
Source File: Reorder.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance * structure (any instances contained in the object are ignored - only the * structure is required). * @return true if the outputFormat may be collected immediately * @throws Exception if a problem occurs setting the input format */ public boolean setInputFormat(Instances instanceInfo) throws Exception { super.setInputFormat(instanceInfo); FastVector attributes = new FastVector(); int outputClass = -1; m_SelectedAttributes = determineIndices(instanceInfo.numAttributes()); for (int i = 0; i < m_SelectedAttributes.length; i++) { int current = m_SelectedAttributes[i]; if (instanceInfo.classIndex() == current) { outputClass = attributes.size(); } Attribute keep = (Attribute)instanceInfo.attribute(current).copy(); attributes.addElement(keep); } initInputLocators(instanceInfo, m_SelectedAttributes); Instances outputFormat = new Instances(instanceInfo.relationName(), attributes, 0); outputFormat.setClassIndex(outputClass); setOutputFormat(outputFormat); return true; }
Example 5
Source File: WekaUtilTester.java From AILibs with GNU Affero General Public License v3.0 | 6 votes |
@Test public void checkSplit() throws Exception { Instances inst = new Instances(new BufferedReader(new FileReader(VOWEL_ARFF))); inst.setClassIndex(inst.numAttributes() - 1); for (Classifier c : this.portfolio) { /* eval for CV */ inst.stratify(10); Instances train = inst.trainCV(10, 0); Instances test = inst.testCV(10, 0); Assert.assertEquals(train.size() + test.size(), inst.size()); Evaluation eval = new Evaluation(train); eval.crossValidateModel(c, inst, 10, new Random(0)); c.buildClassifier(train); eval.evaluateModel(c, test); System.out.println(eval.pctCorrect()); } }
Example 6
Source File: DatasetLoader.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
/** * Load the mnist minimal meta arff file * * @return Mnist minimal meta data as Instances * @throws Exception IO error. */ public static Instances loadCSV(String path) throws Exception { CSVLoader csv = new CSVLoader(); csv.setSource(new File(path)); Instances data = csv.getDataSet(); data.setClassIndex(data.numAttributes() - 1); return data; }
Example 7
Source File: WekaDeeplearning4jExamples.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
private static void dl4jResnet50() throws Exception { String folderPath = "src/test/resources/nominal/plant-seedlings-small"; ImageDirectoryLoader loader = new ImageDirectoryLoader(); loader.setInputDirectory(new File(folderPath)); Instances inst = loader.getDataSet(); inst.setClassIndex(1); Dl4jMlpClassifier classifier = new Dl4jMlpClassifier(); classifier.setNumEpochs(3); KerasEfficientNet kerasEfficientNet = new KerasEfficientNet(); kerasEfficientNet.setVariation(EfficientNet.VARIATION.EFFICIENTNET_B1); classifier.setZooModel(kerasEfficientNet); ImageInstanceIterator iterator = new ImageInstanceIterator(); iterator.setImagesLocation(new File(folderPath)); classifier.setInstanceIterator(iterator); // Stratify and split the data Random rand = new Random(0); inst.randomize(rand); inst.stratify(5); Instances train = inst.trainCV(5, 0); Instances test = inst.testCV(5, 0); // Build the classifier on the training data classifier.buildClassifier(train); // Evaluate the model on test data Evaluation eval = new Evaluation(test); eval.evaluateModel(classifier, test); // Output some summary statistics System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); }
Example 8
Source File: SentimentAnalyser.java From sentiment-analysis with Apache License 2.0 | 5 votes |
/**Decides upon a "disagreed" document by applying the learned model based on the previously build model.*/ private String clarifyOnModel(String tweet){ String out = ""; // get the text-based representation of the document double[] instanceValues = new double[2]; instanceValues[0] = test.attribute(0).addStringValue(tweet); test.add(new SparseInstance(1.0, instanceValues)); try{ stwv.setInputFormat(test); Instances newData = Filter.useFilter(test, stwv); // re-order attributes so that they are compatible with the training set's ones Instances test_instance = reformatText(newData); // find the polarity of the document based on the previously built model test_instance.setClassIndex(0); double[] preds = multiNB.distributionForInstance(test_instance.get(0)); if (preds[0]>0.5) out = "light positive"; else out = "light negative"; } catch (Exception e){ e.printStackTrace(); } test.remove(0); return out; }
Example 9
Source File: PartitionMembership.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Signify that this batch of input to the filter is finished. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (outputFormatPeek() == null) { Instances toFilter = getInputFormat(); // Build the partition generator m_partitionGenerator.generatePartition(toFilter); // Create output dataset FastVector attInfo = new FastVector(); for (int i = 0; i < m_partitionGenerator.numElements(); i++) { attInfo.addElement(new Attribute("partition_" + i)); } if (toFilter.classIndex() >= 0) { attInfo.addElement(toFilter.classAttribute().copy()); } attInfo.trimToSize(); Instances filtered = new Instances(toFilter.relationName() + "_partitionMembership", attInfo, 0); if (toFilter.classIndex() >= 0) { filtered.setClassIndex(filtered.numAttributes() - 1); } setOutputFormat(filtered); // build new dataset for (int i = 0; i < toFilter.numInstances(); i++) { convertInstance(toFilter.instance(i)); } } flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); }
Example 10
Source File: Main-SVG.java From Java-for-Data-Science with MIT License | 5 votes |
public Main() { try { BufferedReader datafile; datafile = readDataFile("camping.txt"); Instances data = new Instances(datafile); data.setClassIndex(data.numAttributes() - 1); Instances trainingData = new Instances(data, 0, 14); Instances testingData = new Instances(data, 14, 5); Evaluation evaluation = new Evaluation(trainingData); SMO smo = new SMO(); smo.buildClassifier(data); evaluation.evaluateModel(smo, testingData); System.out.println(evaluation.toSummaryString()); // Test instance Instance instance = new DenseInstance(3); instance.setValue(data.attribute("age"), 78); instance.setValue(data.attribute("income"), 125700); instance.setValue(data.attribute("camps"), 1); instance.setDataset(data); System.out.println("The instance: " + instance); System.out.println(smo.classifyInstance(instance)); } catch (Exception ex) { ex.printStackTrace(); } }
Example 11
Source File: COEDEvaluationTest.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
@Test public void coedEvalTest() throws Exception { logger.info("Starting COED evaluation test..."); /* load dataset and create a train-test-split */ OpenmlConnector connector = new OpenmlConnector(); DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID); File file = ds.getDataset(DataSetUtils.API_KEY); Instances data = new Instances(new BufferedReader(new FileReader(file))); data.setClassIndex(data.numAttributes() - 1); List<Instances> split = WekaUtil.getStratifiedSplit(data, 42, .01f); fail("This is a reminder to say that this test yet does not check anything. Result of the computation is: " + EvaluationUtils.calculateCOEDForBatch(split.get(0))); }
Example 12
Source File: LearnPatternSimilarityClassifier.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
/** * Predicts the class by generated segment and segment difference features based * on <code>segments</code> and <code>segmentsDifference</code>. The induced * instances are propagated to the forest of {@link RandomRegressionTree}s * <code>trees</code>. The predicted leaf nodes are used within a 1NN search on * the training leaf nodes to find the nearest instance and taking its class as * prediction value. * * @param univInstance * Univariate instance to be predicted * */ @Override public Integer predict(final double[] univInstance) throws PredictionException { if (!this.isTrained()) { throw new PredictionException("Model has not been built before!"); } if (univInstance == null) { throw new IllegalArgumentException("Instance to be predicted must not be null or empty!"); } int[][] leafNodeCounts = new int[this.trees.length][]; for (int i = 0; i < this.trees.length; i++) { // Generate subseries features Instances seqInstances = new Instances("SeqFeatures", new ArrayList<>(this.attributes), this.lengthPerTree[i]); for (int len = 0; len < this.lengthPerTree[i]; len++) { Instance instance = LearnPatternSimilarityLearningAlgorithm.generateSubseriesFeatureInstance(univInstance, this.segments[i], this.segmentsDifference[i], len); seqInstances.add(instance); } seqInstances.setClassIndex(this.classAttIndexPerTree[i]); leafNodeCounts[i] = new int[this.trees[i].getNosLeafNodes()]; for (int inst = 0; inst < seqInstances.numInstances(); inst++) { LearnPatternSimilarityLearningAlgorithm.collectLeafCounts(leafNodeCounts[i], seqInstances.get(inst), this.trees[i]); } } return this.trainTargets[this.findNearestInstanceIndex(leafNodeCounts)]; }
Example 13
Source File: MLPlan4BigFileInputTester.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
@Test public void test() throws Exception { // MLPlan4BigFileInput mlplan = new MLPlan4BigFileInput(new File("testrsc/openml/41103.arff")); String origDataSrcName = "testrsc/openml/1240.arff"; if (true) { Instances data = new Instances(new FileReader(new File(origDataSrcName))); data.setClassIndex(data.numAttributes() - 1); List<Instances> split = WekaUtil.getStratifiedSplit(data, 0, .7f); ArffSaver saver = new ArffSaver(); saver.setInstances(split.get(0)); saver.setFile(new File(origDataSrcName + ".train")); saver.writeBatch(); saver.setInstances(split.get(1)); saver.setFile(new File(origDataSrcName + ".test")); saver.writeBatch(); System.exit(0); } MLPlan4BigFileInput mlplan = new MLPlan4BigFileInput(new File(origDataSrcName + ".train")); mlplan.setTimeout(new Timeout(5, TimeUnit.MINUTES)); mlplan.setLoggerName("testedalgorithm"); long start = System.currentTimeMillis(); Classifier c = mlplan.call(); System.out.println("Observed output: " + c + " after " + (System.currentTimeMillis() - start) + "ms. Now validating the model"); /* check quality */ Instances testData = new Instances(new FileReader(new File(origDataSrcName + ".test"))); testData.setClassIndex(testData.numAttributes() - 1); Evaluation eval = new Evaluation(testData); eval.evaluateModel(c, testData); System.out.println(eval.toSummaryString()); assertNotNull(c); }
Example 14
Source File: DataSetUtils.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
public static DataSet getDataSetByID(final int datasetID) throws Exception { OpenmlConnector connector = new OpenmlConnector(); DataSetDescription ds = connector.dataGet(datasetID); File file = connector.datasetGet(ds); Instances data = new Instances(new BufferedReader(new FileReader(file))); data.setClassIndex(data.numAttributes() - 1); List<INDArray> indArrayList = new LinkedList<>(); for (Instance i : data) { indArrayList.add(instanceToMatrixByDataSet(i, datasetID)); } return new DataSet(data, indArrayList); }
Example 15
Source File: CnnTextEmbeddingInstanceIteratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
/** * Test batch correct creation. * * @throws Exception IO error. */ @Test public void testBatches() throws Exception { // Data Instances data = makeData(); data.setClassIndex(data.numAttributes() - 1); final int seed = 1; for (int batchSize : new int[]{1, 2, 5, 10}) { final int actual = countIterations(data, cteii, seed, batchSize); final int expected = (int) Math.ceil(data.numInstances() / ((double) batchSize)); Assert.assertEquals(expected, actual); } }
Example 16
Source File: MLPlanGraphGeneratorTest.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
@Override public List<Pair<IGraphGenerator<TFDNode, String>, Integer>> getGraphGenerators() throws IOException { /* extract graph generator from mlplan */ Instances data = new Instances(new FileReader("testrsc/car.arff")); data.setClassIndex(data.numAttributes() - 1); MLPlan<IWekaClassifier> mlplan = new MLPlanWekaBuilder().withDataset(new WekaInstances(data)).build(); IGraphGenerator<TFDNode, String> graphGenerator = mlplan.getSearchProblemInputGenerator().getGraphGenerator(); /* generate the actual input for the test */ List<Pair<IGraphGenerator<TFDNode, String>, Integer>> gg = new ArrayList<>(); gg.add(new Pair<>(graphGenerator, 10000)); return gg; }
Example 17
Source File: MLCBMaD.java From meka with GNU General Public License v3.0 | 4 votes |
@Override public Instance transformInstance(Instance x) throws Exception{ Instances tmpInst = new Instances(x.dataset()); tmpInst.delete(); tmpInst.add(x); Instances features = this.extractPart(tmpInst, false); Instances pseudoLabels = new Instances(this.compressedMatrix); Instance tmpin = pseudoLabels.instance(0); pseudoLabels.delete(); pseudoLabels.add(tmpin); for ( int i = 0; i< pseudoLabels.classIndex(); i++) { pseudoLabels.instance(0).setMissing(i); } Instances newDataSet = Instances.mergeInstances(pseudoLabels, features); newDataSet.setClassIndex(this.size); return newDataSet.instance(0); }
Example 18
Source File: RacedIncrementalLogitBoost.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * performs a boosting iteration, returning a new model for the committee * * @param data the data to boost on * @return the new model * @throws Exception if anything goes wrong */ protected Classifier[] boost(Instances data) throws Exception { Classifier[] newModel = AbstractClassifier.makeCopies(m_Classifier, m_NumClasses); // Create a copy of the data with the class transformed into numeric Instances boostData = new Instances(data); boostData.deleteWithMissingClass(); int numInstances = boostData.numInstances(); // Temporarily unset the class index int classIndex = data.classIndex(); boostData.setClassIndex(-1); boostData.deleteAttributeAt(classIndex); boostData.insertAttributeAt(new Attribute("'pseudo class'"), classIndex); boostData.setClassIndex(classIndex); double [][] trainFs = new double [numInstances][m_NumClasses]; double [][] trainYs = new double [numInstances][m_NumClasses]; for (int j = 0; j < m_NumClasses; j++) { for (int i = 0, k = 0; i < numInstances; i++, k++) { while (data.instance(k).classIsMissing()) k++; trainYs[i][j] = (data.instance(k).classValue() == j) ? 1 : 0; } } // Evaluate / increment trainFs from the classifiers for (int x = 0; x < m_models.size(); x++) { for (int i = 0; i < numInstances; i++) { double [] pred = new double [m_NumClasses]; double predSum = 0; Classifier[] model = (Classifier[]) m_models.elementAt(x); for (int j = 0; j < m_NumClasses; j++) { pred[j] = model[j].classifyInstance(boostData.instance(i)); predSum += pred[j]; } predSum /= m_NumClasses; for (int j = 0; j < m_NumClasses; j++) { trainFs[i][j] += (pred[j] - predSum) * (m_NumClasses-1) / m_NumClasses; } } } for (int j = 0; j < m_NumClasses; j++) { // Set instance pseudoclass and weights for (int i = 0; i < numInstances; i++) { double p = RtoP(trainFs[i], j); Instance current = boostData.instance(i); double z, actual = trainYs[i][j]; if (actual == 1) { z = 1.0 / p; if (z > Z_MAX) { // threshold z = Z_MAX; } } else if (actual == 0) { z = -1.0 / (1.0 - p); if (z < -Z_MAX) { // threshold z = -Z_MAX; } } else { z = (actual - p) / (p * (1 - p)); } double w = (actual - p) / z; current.setValue(classIndex, z); current.setWeight(numInstances * w); } Instances trainData = boostData; if (m_UseResampling) { double[] weights = new double[boostData.numInstances()]; for (int kk = 0; kk < weights.length; kk++) { weights[kk] = boostData.instance(kk).weight(); } trainData = boostData.resampleWithWeights(m_RandomInstance, weights); } // Build the classifier newModel[j].buildClassifier(trainData); } return newModel; }
Example 19
Source File: CNode.java From meka with GNU General Public License v3.0 | 4 votes |
/** * Main - run some tests. */ public static void main(String args[]) throws Exception { Instances D = new Instances(new FileReader(args[0])); Instance x = D.lastInstance(); D.remove(D.numInstances()-1); int L = Integer.parseInt(args[1]); D.setClassIndex(L); double y[] = new double[L]; Random r = new Random(); int s[] = new int[]{1,0,2}; int PA_J[][] = new int[][]{ {},{},{0,1}, }; //MLUtils.randomize(s,r); // MUST GO IN TREE ORDER !! for(int j : s) { int pa_j[] = PA_J[j]; System.out.println("PARENTS = "+Arrays.toString(pa_j)); //MLUtils.randomize(pa_j,r); System.out.println("**** TRAINING ***"); CNode n = new CNode(j,null,pa_j); n.build(D,new SMO()); /* */ //Instances D_ = n.transform(D); //n.T = D_; System.out.println("============== D_"+j+" / class = "+n.T.classIndex()+" ="); System.out.println(""+n.T); System.out.println("**** TESTING ****"); /* Instance x_ = MLUtils.setTemplate(x,(Instance)D_.firstInstance().copy(),D_); for(int pa : pa_j) { //System.out.println(""+map[pa]); x_.setValue(n.map[pa],y[pa]); } //x_.setDataset(T); x_.setClassMissing(); */ //n.T = D_; Instance x_ = n.transform(x,y); System.out.println(""+x_); y[j] = 1; } }
Example 20
Source File: LabelWordVectors.java From AffectiveTweets with GNU General Public License v3.0 | 2 votes |
@Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { ArrayList<Attribute> atts = new ArrayList<Attribute>(); // Adds all attributes of the inputformat for (int i = 0; i < inputFormat.numAttributes(); i++) { atts.add(inputFormat.attribute(i)); } // The dictionaries of the lexicons are intialized only in the first batch if(!this.isFirstBatchDone()) this.initializeDicts(); for(ArffLexiconWordLabeller lexEval:this.lexiconLabs){ for(Attribute att:lexEval.getAttributes()){ if(att.isNumeric()) atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name())); else if(att.isNominal()){ List<String> attValues=new ArrayList<String>(); for(int i=0;i<att.numValues();i++){ attValues.add(att.value(i)); } atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name(),attValues)); } } } Instances result = new Instances(inputFormat.relationName(), atts, 0); // set the class index result.setClassIndex(inputFormat.classIndex()); return result; }