Java Code Examples for weka.core.Instances#size()
The following examples show how to use
weka.core.Instances#size() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CnnTextEmbeddingInstanceIteratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Test getDataSetIterator */ @Test public void testGetIteratorNumericClass() throws Exception { final Instances data = makeData(); final int batchSize = 1; final DataSetIterator it = this.cteii.getDataSetIterator(data, SEED, batchSize); Set<Double> labels = new HashSet<>(); for (int i = 0; i < data.size(); i++) { Instance inst = data.get(i); double label = inst.value(data.classIndex()); final DataSet next = Utils.getNext(it); double itLabel = next.getLabels().getDouble(0); Assert.assertEquals(label, itLabel, 1e-5); labels.add(label); } }
Example 2
Source File: CnnTextFilesEmbeddingInstanceIteratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Test getDataSetIterator */ @Test public void testGetIteratorNumericClass() throws Exception { final Instances data = DatasetLoader.loadAngerMeta(); final int batchSize = 1; final DataSetIterator it = this.cteii.getDataSetIterator(data, SEED, batchSize); Set<Double> labels = new HashSet<>(); for (int i = 0; i < data.size(); i++) { Instance inst = data.get(i); double label = inst.value(data.classIndex()); final DataSet next = Utils.getNext(it); double itLabel = next.getLabels().getDouble(0); Assert.assertEquals(label, itLabel, 1e-5); labels.add(label); } }
Example 3
Source File: Dl4JMlpFilterTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
protected void checkLayer(Dl4jMlpClassifier clf, Instances instances, String[] transformationLayerNames, String clfPath, boolean useZooModel) throws Exception { Instances activationsExpected = clf.getActivationsAtLayers(transformationLayerNames, instances); Dl4jMlpFilter filter = new Dl4jMlpFilter(); // Load the MNIST III if we're being called on the MNIST dataset (dataset is in meta format (String, class)) if (ImageInstanceIterator.isMetaArff(instances)) filter.setInstanceIterator(DatasetLoader.loadMiniMnistImageIterator()); filter.setSerializedModelFile(new File(clfPath)); filter.setTransformationLayerNames(transformationLayerNames); filter.setInputFormat(instances); filter.setPoolingType(PoolingType.NONE); Instances activationsActual = Filter.useFilter(instances, filter); for (int i = 0; i < activationsActual.size(); i++) { Instance expected = activationsExpected.get(i); Instance actual = activationsActual.get(i); for (int j = 0; j < expected.numAttributes(); j++) { assertEquals(expected.value(j), actual.value(j), 1e-6); } } }
Example 4
Source File: DataSetUtils.java From AILibs with GNU Affero General Public License v3.0 | 6 votes |
public static Instance matrixToInstance(final INDArray instance, final Instances refInstances) { if (instance == null || refInstances == null) { throw new IllegalArgumentException("Parameter 'instance' and 'refInstances' must not be null!"); } // Create attributes final ArrayList<Attribute> attributes = new ArrayList<>(); for (int i = 0; i < instance.length(); i++) { final Attribute newAtt = new Attribute("val" + i); attributes.add(newAtt); } final List<String> classValues = IntStream.range(0, refInstances.classAttribute().numValues()).asDoubleStream().mapToObj(String::valueOf).collect(Collectors.toList()); final Attribute classAtt = new Attribute(CLASS_ATT_NAME, classValues); attributes.add(classAtt); final Instances result = new Instances(INSTANCES_DS_NAME, attributes, refInstances.size()); result.setClassIndex(result.numAttributes() - 1); // Initialize instance final Instance inst = new DenseInstance(1, ArrayUtils.addAll(Nd4j.toFlattened(instance).toDoubleVector(), 0)); inst.setDataset(result); return inst; }
Example 5
Source File: ExactIntervalAugSpaceSampler.java From AILibs with GNU Affero General Public License v3.0 | 6 votes |
@Override public Instance augSpaceSample() { Instances preciseInsts = this.getPreciseInsts(); int numInsts = preciseInsts.size(); ArrayList<Instance> sampledPoints = new ArrayList<>(); Instance x1 = preciseInsts.get(this.getRng().nextInt(numInsts)); Instance x2 = preciseInsts.get(this.getRng().nextInt(numInsts)); // Assume last attribute is the class int numFeatures = preciseInsts.numAttributes() - 1; for (Instance inst : preciseInsts) { boolean inInterval = true; for (int att = 0; att < numFeatures && inInterval; att++) { if (inst.value(att) < Math.min(x1.value(att), x2.value(att)) || inst.value(att) > Math.max(x1.value(att), x2.value(att))) { inInterval = false; } } if (inInterval) { sampledPoints.add(inst); } } return generateAugPoint(sampledPoints); }
Example 6
Source File: CnnTextFilesEmbeddingInstanceIteratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Test getDataSetIterator */ @Test public void testGetIteratorNumericClass() throws Exception { final Instances data = DatasetLoader.loadAngerMeta(); final int batchSize = 1; final DataSetIterator it = this.cteii.getDataSetIterator(data, SEED, batchSize); Set<Double> labels = new HashSet<>(); for (int i = 0; i < data.size(); i++) { Instance inst = data.get(i); double label = inst.value(data.classIndex()); final DataSet next = Utils.getNext(it); double itLabel = next.getLabels().getDouble(0); Assert.assertEquals(label, itLabel, 1e-5); labels.add(label); } }
Example 7
Source File: WekaInstancesTester.java From AILibs with GNU Affero General Public License v3.0 | 6 votes |
@Test public void testEqualnessOfTwoCopiesOfSameDataset() throws Exception { Instances ds1 = new Instances(new FileReader(this.dataset)); ds1.setClassIndex(ds1.numAttributes() - 1); WekaInstances wrapped1 = new WekaInstances(ds1); Instances ds2 = new Instances(new FileReader(this.dataset)); ds2.setClassIndex(ds2.numAttributes() - 1); WekaInstances wrapped2 = new WekaInstances(ds2); /* first conduct an instance-wise comparison and a mutual containment check */ int n = ds1.size(); assertEquals("Copy of dataset has different length than the original.", n, ds2.size()); for (int i = 0; i < n; i++) { IWekaInstance i1 = wrapped1.get(i); IWekaInstance i2 = wrapped2.get(i); assertEquals("Hash codes of single instance don't match!", i1.hashCode(), i2.hashCode()); assertEquals("Comparing the instances with equals yields false.", i1, i2); assertTrue("The second dataset does not contain " + i1 + ", which is contained in the first.", wrapped2.contains(i1)); assertTrue("The first dataset does not contain " + i2 + ", which is contained in the second.", wrapped1.contains(i2)); } /* now compare the entire dataset */ assertEquals("Hash codes of entire dataset don't match!", wrapped1.hashCode(), wrapped2.hashCode()); assertEquals("Comparing the datasets with equals yields false.", wrapped1, wrapped2); }
Example 8
Source File: RLTunedKNNSetup.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override public void accept(Instances trainData) { neighbourCount = new Box<>(1); // must start at 1 otherwise the loocv produces no train estimate paramCount = new Box<>(0); longestExploreTimeNanos = 0; id = 0; longestExploitTimeNanos = 0; nextImproveableBenchmarks = new HashSet<>(); improveableBenchmarks = new HashSet<>(); unimproveableBenchmarks = new HashSet<>(); switchImproveableBenchmarks(); finalBenchmarks = PrunedMultimap.desc(ArrayList::new); finalBenchmarks.setSoftLimit(1); final int seed = rlTunedClassifier.getSeed(); paramSpace = paramSpaceBuilder.apply(trainData); paramSetIterator = new RandomListIterator<>(this.paramSpace, seed).setRemovedOnNext(true); fullParamSpaceSize = this.paramSpace.size(); fullNeighbourhoodSize = trainData.size(); // todo check all seeds set maxNeighbourhoodSize = findLimit(fullNeighbourhoodSize, neighbourhoodSizeLimit, neighbourhoodSizeLimitPercentage); maxParamSpaceSize = findLimit(fullParamSpaceSize, paramSpaceSizeLimit, paramSpaceSizeLimitPercentage); if(!incrementalMode) { neighbourCount.set(maxNeighbourhoodSize); } // transform classifiers into benchmarks explorer = new ParamExplorer(); // setup an iterator to improve benchmarks exploiter = new NeighbourExploiter(); stategy = new LeeStategy(); agent = new KnnAgent(); // set corresponding iterators in the incremental tuned classifier rlTunedClassifier.setAgent(agent); rlTunedClassifier.setEnsembler(Ensembler.single()); // todo make sure the seeds are set for everything }
Example 9
Source File: PartitionedMultiFilter.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * tests the data whether the filter can actually handle it. * * @param instanceInfo the data to test * @throws Exception if the test fails */ protected void testInputFormat(Instances instanceInfo) throws Exception { for (int i = 0; i < getRanges().length; i++) { Instances newi = new Instances(instanceInfo, 0); if (instanceInfo.size() > 0){ newi.add((Instance)instanceInfo.get(0).copy()); } Range range = getRanges()[i]; range.setUpper(instanceInfo.numAttributes() - 1); Instances subset = generateSubset(newi, range); getFilters()[i].setInputFormat(subset); } }
Example 10
Source File: WekaUtil.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
public static double[] getClassesAsArray(final Instances inst) { int n = inst.size(); double[] vec = new double[n]; for (int i = 0; i < n; i++) { vec[i] = inst.get(i).classValue(); } return vec; }
Example 11
Source File: RankingByPairwiseComparison.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
public void fit(final Instances dataset, final int labels) throws Exception { this.labelIndices = getLabelIndices(labels, dataset); this.labelIndices.stream().map(x -> dataset.attribute(x).name()).forEach(this.labelSet::add); Instances plainPWDataset = this.applyFiltersToDataset(dataset); try { for (int i = 0; i < this.labelIndices.size() - 1; i++) { for (int j = i + 1; j < this.labelIndices.size(); j++) { PairWiseClassifier pwc = new PairWiseClassifier(); pwc.a = dataset.attribute(this.labelIndices.get(i)).name(); pwc.b = dataset.attribute(this.labelIndices.get(j)).name(); pwc.c = AbstractClassifier.forName(this.config.getBaseLearner(), null); Instances pwDataset = new Instances(plainPWDataset); for (int k = 0; k < pwDataset.size(); k++) { String value; if (dataset.get(k).value(this.labelIndices.get(i)) > dataset.get(k).value(this.labelIndices.get(j))) { value = "true"; } else { value = "false"; } pwDataset.get(k).setValue(pwDataset.numAttributes() - 1, value); } pwDataset.setClassIndex(pwDataset.numAttributes() - 1); pwc.c.buildClassifier(pwDataset); this.pwClassifiers.add(pwc); } } } catch (Exception e) { throw new TrainingException("Could not build ranker", e); } }
Example 12
Source File: RandomStratifiedIndexSampler.java From tsml with GNU General Public License v3.0 | 5 votes |
public void setInstances(Instances instances) { instancesByClass = indexByClass(instances); classDistribution = classDistribution(instances); classSamplingProbabilities = classDistribution(instances); count = 0; maxCount = instances.size(); }
Example 13
Source File: RandomStratifiedSampler.java From tsml with GNU General Public License v3.0 | 5 votes |
public void setInstances(Instances instances) { instancesByClass = instancesByClass(instances); classDistribution = classDistribution(instances); classSamplingProbabilities = classDistribution(instances); count = 0; maxCount = instances.size(); }
Example 14
Source File: InstanceTools.java From tsml with GNU General Public License v3.0 | 5 votes |
public static List<List<Integer>> indexByClass(Instances instances) { List<List<Integer>> instancesByClass = new ArrayList<>(); int numClasses = instances.get(0).numClasses(); for(int i = 0; i < numClasses; i++) { instancesByClass.add(new ArrayList()); } for(int i = 0; i < instances.size(); i++) { instancesByClass.get((int) instances.get(i).classValue()).add(i); } return instancesByClass; }
Example 15
Source File: MLPipeline.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
public double[] classifyInstances(final Instances arg0) throws Exception { int n = arg0.size(); double[] answers = new double[n]; for (int i = 0; i < n; i++) { answers[i] = this.classifyInstance(arg0.get(i)); } return answers; }
Example 16
Source File: TableMaker.java From NLIWOD with GNU Affero General Public License v3.0 | 4 votes |
public static void main(String[] args) throws Exception { Path datapath= Paths.get("./src/main/resources/old/Qald6Logs.arff"); BufferedReader reader = new BufferedReader(new FileReader(datapath.toString())); ArffReader arff = new ArffReader(reader); Instances data = arff.getData(); data.setClassIndex(6); //Change To Classifier of Choice PSt Classifier = new PSt(); Classifier.buildClassifier(data); JSONObject qald6test = Utils.loadTestQuestions(); JSONArray questions = (JSONArray) qald6test.get("questions"); ArrayList<String> testQuestions = Lists.newArrayList(); for(int i = 0; i < questions.size(); i++){ JSONObject questionData = (JSONObject) questions.get(i); JSONArray questionStrings = (JSONArray) questionData.get("question"); JSONObject questionEnglish = (JSONObject) questionStrings.get(0); testQuestions.add((String) questionEnglish.get("string")); } ArrayList<String> systems = Lists.newArrayList("KWGAnswer", "NbFramework", "PersianQA", "SemGraphQA", "UIQA_withoutManualEntries", "UTQA_English" ); double avef = 0; double[] systemavef = {0,0,0,0,0,0,0}; for(int i=0; i<data.size(); i++){ String tmp = ""; tmp += i +"\t &" + testQuestions.get(i); double bestf = 0; for(String system: systems){ double p = Float.parseFloat(Utils.loadSystemP(system).get(i)); double r = Float.parseFloat(Utils.loadSystemR(system).get(i)); double f = 0; if(!(p==0&&r==0)){ f = 2*p*r/(p+r); } if(f > bestf){ bestf = f; } tmp += "\t &" + Math.floor(f * 100) / 100; systemavef[systems.indexOf(system)] += f/data.size(); } systemavef[6] += bestf/data.size(); tmp += "\t &" + Math.floor(bestf * 100) / 100; double[] confidences = Classifier.distributionForInstance(data.get(i)); System.out.println(Arrays.toString(confidences)); int argmax = -1; double max = -1; for(int j = 0; j < 6; j++){ if(confidences[j]>max){ max = confidences[j]; argmax = j; } } String sys2ask = systems.get(systems.size() - argmax -1); double systemp = Float.parseFloat(Utils.loadSystemP(sys2ask).get(i)); double systemr = Float.parseFloat(Utils.loadSystemR(sys2ask).get(i)); double systemf = 0; if(!(systemp==0&&systemr==0)){ systemf = 2*systemp*systemr/(systemp+systemr); } avef += systemf; tmp += "\t &" + Math.floor(systemf * 100) / 100; tmp += "\\\\"; System.out.println(tmp); } System.out.println(Arrays.toString(systemavef)); System.out.println(avef/data.size()); }
Example 17
Source File: WekaUtil.java From AILibs with GNU Affero General Public License v3.0 | 4 votes |
public static double getRelativeNumberOfInstancesFromClass(final Instances data, final Collection<String> cs) { return getNumberOfInstancesFromClass(data, cs) / (1f * data.size()); }
Example 18
Source File: SequenceStatsCache.java From tsml with GNU General Public License v3.0 | 4 votes |
public SequenceStatsCache(final Instances train, final int startingWindow) { this.train = train; int nSequences = train.size(); int length = train.numAttributes() - 1; this.LEs = new ArrayList<>(nSequences); this.UEs = new ArrayList<>(nSequences); this.lastWindowComputed = new double[nSequences]; this.lastERPWindowComputed = new double[nSequences]; this.lastLCSSWindowComputed = new double[nSequences]; Arrays.fill(this.lastWindowComputed, -1); Arrays.fill(this.lastERPWindowComputed, -1); Arrays.fill(this.lastLCSSWindowComputed, -1); this.currentWindow = startingWindow; this.mins = new double[nSequences]; this.maxs = new double[nSequences]; this.indexMins = new int[nSequences]; this.indexMaxs = new int[nSequences]; this.isMinFirst = new boolean[nSequences]; this.isMinLast = new boolean[nSequences]; this.isMaxFirst = new boolean[nSequences]; this.isMaxLast = new boolean[nSequences]; this.indicesSortedByAbsoluteValue = new IndexedDouble[nSequences][length]; for (int i = 0; i < train.size(); i++) { double min = Double.POSITIVE_INFINITY; double max = Double.NEGATIVE_INFINITY; int indexMin = -1, indexMax = -1; for (int j = 0; j < train.numAttributes() - 1; j++) { double val = train.get(i).value(j); if (val > max) { max = val; indexMax = j; } if (val < min) { min = val; indexMin = j; } indicesSortedByAbsoluteValue[i][j] = new IndexedDouble(j, Math.abs(val)); } indexMaxs[i] = indexMax; indexMins[i] = indexMin; mins[i] = min; maxs[i] = max; isMinFirst[i] = (indexMin == 0); isMinLast[i] = (indexMin == (train.numAttributes() - 2)); isMaxFirst[i] = (indexMax == 0); isMaxLast[i] = (indexMax == (train.numAttributes() - 2)); Arrays.sort(indicesSortedByAbsoluteValue[i], (v1, v2) -> -Double.compare(v1.value, v2.value)); this.LEs.add(new double[length]); this.UEs.add(new double[length]); } }
Example 19
Source File: UnsupervisedShapelets.java From tsml with GNU General Public License v3.0 | 4 votes |
private void extractUShapelets(Instances data){ int[] shapeletLengths = {25, 50}; if (data.numAttributes() < 50){ shapeletLengths = new int[]{data.numAttributes()/2}; } shapelets = new ArrayList(); numInstances = data.size(); Instance inst = data.firstInstance(); boolean finished = false; while (!finished){ ArrayList<UShapelet> shapeletCandidates = new ArrayList(); //Finds all candidate shapelets on the selected instance for (int i = 0; i < shapeletLengths.length; i++){ for (int n = 0; n < inst.numAttributes() - shapeletLengths[i]; n++){ UShapelet candidate = new UShapelet(n, shapeletLengths[i], inst); candidate.computeGap(data); shapeletCandidates.add(candidate); } } double maxGap = -1; int maxGapIndex = -1; //Finds the shapelet with the highest gap value for (int i = 0; i < shapeletCandidates.size(); i++){ if (shapeletCandidates.get(i).gap > maxGap){ maxGap = shapeletCandidates.get(i).gap; maxGapIndex = i; } } //Adds the shapelet with the best gap value to the pool of shapelets UShapelet best = shapeletCandidates.get(maxGapIndex); shapelets.add(best); double[] distances = best.computeDistances(data); ArrayList<Double> lesserDists = new ArrayList(); double maxDist = -1; int maxDistIndex = -1; //Finds the instance with the max dist to the shapelet and all with a dist lower than the distance used //to generate the gap value. for (int i = 0; i < distances.length; i++){ if (distances[i] < best.dt){ lesserDists.add(distances[i]); } else if (distances[i] > maxDist){ maxDist = distances[i]; maxDistIndex = i; } } //Use max dist instance to generate new shapelet and remove low distance instances if (lesserDists.size() == 1){ finished = true; } else{ inst = data.get(maxDistIndex); double mean = mean(lesserDists); double cutoff = mean + standardDeviation(lesserDists, mean); Instances newData = new Instances(data, 0); for (int i = 0; i < data.numInstances(); i++){ if (distances[i] >= cutoff){ newData.add(data.get(i)); } } data = newData; if (data.size() == 1){ finished = true; } } } }
Example 20
Source File: CrossValidationExperiments.java From NLIWOD with GNU Affero General Public License v3.0 | 4 votes |
public static void main(String[] args) throws Exception { Path datapath= Paths.get("./src/main/resources/old/Qald6Logs.arff"); BufferedReader reader = new BufferedReader(new FileReader(datapath.toString())); ArffReader arff = new ArffReader(reader); Instances data = arff.getData(); data.setClassIndex(6); ArrayList<String> systems = Lists.newArrayList("KWGAnswer", "NbFramework", "PersianQA", "SemGraphQA", "UIQA_withoutManualEntries", "UTQA_English" ); int seed = 133; // Change to 100 for leave-one-out CV int folds = 10; Random rand = new Random(seed); Instances randData = new Instances(data); randData.randomize(rand); float cv_ave_f = 0; for(int n=0; n < folds; n++){ Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); //Change to the Classifier of your choice CDN Classifier = new CDN(); Classifier.buildClassifier(train); float ave_p = 0; float ave_r = 0; for(int j = 0; j < test.size(); j++){ Instance ins = test.get(j); int k = 0; for(int l=0; l < data.size(); l++){ Instance tmp = data.get(l); if(tmp.toString().equals(ins.toString())){ k = l; } } double[] confidences = Classifier.distributionForInstance(ins); int argmax = -1; double max = -1; for(int i = 0; i < 6; i++){ if(confidences[i]>max){ max = confidences[i]; argmax = i; } } String sys2ask = systems.get(systems.size() - argmax -1); ave_p += Float.parseFloat(Utils.loadSystemP(sys2ask).get(k)); ave_r += Float.parseFloat(Utils.loadSystemR(sys2ask).get(k)); } double p = ave_p/test.size(); double r = ave_r/test.size(); double fmeasure = 0; if(p>0&&r>0){fmeasure = 2*p*r/(p + r);} System.out.println("macro F on fold " + n + ": " + fmeasure); cv_ave_f += fmeasure/folds; } System.out.println("macro F average: " + cv_ave_f); System.out.println('\n'); }