Java Code Examples for weka.core.Instances#get()
The following examples show how to use
weka.core.Instances#get() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ExactIntervalAugSpaceSampler.java From AILibs with GNU Affero General Public License v3.0 | 6 votes |
@Override public Instance augSpaceSample() { Instances preciseInsts = this.getPreciseInsts(); int numInsts = preciseInsts.size(); ArrayList<Instance> sampledPoints = new ArrayList<>(); Instance x1 = preciseInsts.get(this.getRng().nextInt(numInsts)); Instance x2 = preciseInsts.get(this.getRng().nextInt(numInsts)); // Assume last attribute is the class int numFeatures = preciseInsts.numAttributes() - 1; for (Instance inst : preciseInsts) { boolean inInterval = true; for (int att = 0; att < numFeatures && inInterval; att++) { if (inst.value(att) < Math.min(x1.value(att), x2.value(att)) || inst.value(att) > Math.max(x1.value(att), x2.value(att))) { inInterval = false; } } if (inInterval) { sampledPoints.add(inst); } } return generateAugPoint(sampledPoints); }
Example 2
Source File: CnnTextFilesEmbeddingInstanceIteratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Test getDataSetIterator */ @Test public void testGetIteratorNominalClass() throws Exception { final Instances data = DatasetLoader.loadAngerMetaClassification(); final int batchSize = 1; final DataSetIterator it = this.cteii.getDataSetIterator(data, SEED, batchSize); Set<Integer> labels = new HashSet<>(); for (int i = 0; i < data.size(); i++) { Instance inst = data.get(i); int label = Integer.parseInt(inst.stringValue(data.classIndex())); final DataSet next = Utils.getNext(it); int itLabel = next.getLabels().argMax().getInt(0); Assert.assertEquals(label, itLabel); labels.add(label); } final Set<Integer> collect = it.getLabels().stream().map(s -> Double.valueOf(s).intValue()).collect(Collectors.toSet()); Assert.assertEquals(2, labels.size()); Assert.assertTrue(labels.containsAll(collect)); Assert.assertTrue(collect.containsAll(labels)); }
Example 3
Source File: AbstractVectorClusterer.java From tsml with GNU General Public License v3.0 | 6 votes |
protected void normaliseData(Instances data) throws Exception{ if (data.classIndex() >= 0 && data.classIndex() != data.numAttributes()-1){ throw new Exception("Class attribute is available and not the final attribute."); } attributeMeans = new double[data.numAttributes()-1]; attributeStdDevs = new double[data.numAttributes()-1]; for (int i = 0; i < data.numAttributes()-1; i++){ attributeMeans[i] = data.attributeStats(i).numericStats.mean; attributeStdDevs[i] = data.attributeStats(i).numericStats .stdDev; for (int n = 0; n < data.size(); n++){ Instance instance = data.get(n); instance.setValue(i, (instance.value(i) - attributeMeans[i]) /attributeStdDevs[i]); } } }
Example 4
Source File: Dl4JMlpFilterTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
protected void checkLayer(Dl4jMlpClassifier clf, Instances instances, String[] transformationLayerNames, String clfPath, boolean useZooModel) throws Exception { Instances activationsExpected = clf.getActivationsAtLayers(transformationLayerNames, instances); Dl4jMlpFilter filter = new Dl4jMlpFilter(); // Load the MNIST III if we're being called on the MNIST dataset (dataset is in meta format (String, class)) if (ImageInstanceIterator.isMetaArff(instances)) filter.setInstanceIterator(DatasetLoader.loadMiniMnistImageIterator()); filter.setSerializedModelFile(new File(clfPath)); filter.setTransformationLayerNames(transformationLayerNames); filter.setInputFormat(instances); filter.setPoolingType(PoolingType.NONE); Instances activationsActual = Filter.useFilter(instances, filter); for (int i = 0; i < activationsActual.size(); i++) { Instance expected = activationsExpected.get(i); Instance actual = activationsActual.get(i); for (int j = 0; j < expected.numAttributes(); j++) { assertEquals(expected.value(j), actual.value(j), 1e-6); } } }
Example 5
Source File: CnnTextFilesEmbeddingInstanceIteratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Test getDataSetIterator */ @Test public void testGetIteratorNumericClass() throws Exception { final Instances data = DatasetLoader.loadAngerMeta(); final int batchSize = 1; final DataSetIterator it = this.cteii.getDataSetIterator(data, SEED, batchSize); Set<Double> labels = new HashSet<>(); for (int i = 0; i < data.size(); i++) { Instance inst = data.get(i); double label = inst.value(data.classIndex()); final DataSet next = Utils.getNext(it); double itLabel = next.getLabels().getDouble(0); Assert.assertEquals(label, itLabel, 1e-5); labels.add(label); } }
Example 6
Source File: CnnTextFilesEmbeddingInstanceIteratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Test getDataSetIterator */ @Test public void testGetIteratorNominalClass() throws Exception { final Instances data = DatasetLoader.loadAngerMetaClassification(); final int batchSize = 1; final DataSetIterator it = this.cteii.getDataSetIterator(data, SEED, batchSize); Set<Integer> labels = new HashSet<>(); for (int i = 0; i < data.size(); i++) { Instance inst = data.get(i); int label = Integer.parseInt(inst.stringValue(data.classIndex())); final DataSet next = Utils.getNext(it); int itLabel = next.getLabels().argMax().getInt(0); Assert.assertEquals(label, itLabel); labels.add(label); } final Set<Integer> collect = it.getLabels().stream().map(s -> Double.valueOf(s).intValue()).collect(Collectors.toSet()); Assert.assertEquals(2, labels.size()); Assert.assertTrue(labels.containsAll(collect)); Assert.assertTrue(collect.containsAll(labels)); }
Example 7
Source File: AugSpaceAllPairs.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
@Override public Instances apply(final Instances input) { int nPrecise = input.numInstances(); ArrayList<Attribute> augAttrs = new ArrayList<>(input.numAttributes() * 2); for (int attr = 0; attr < input.numAttributes() - 1; attr++) { augAttrs.add(new Attribute("x" + attr + "_lower")); augAttrs.add(new Attribute("x" + attr + "_upper")); } augAttrs.add(new Attribute("y_min")); augAttrs.add(new Attribute("y_max")); int nAllPairs = (nPrecise * (nPrecise - 1)) / 2; Instances augInstances = new Instances("aug_space_train", augAttrs, nAllPairs); for (int i = 0; i < nPrecise; i++) { for (int j = 0; j < nPrecise; j++) { ArrayList<Instance> sampledPoints = new ArrayList<>(); Instance x1 = input.get(i); Instance x2 = input.get(j); // Assume last attribute is the class int numFeatures = input.numAttributes() - 1; for (Instance inst : input) { boolean inInterval = true; for (int att = 0; att < numFeatures && inInterval; att++) { if (inst.value(att) < Math.min(x1.value(att), x2.value(att)) || inst.value(att) > Math.max(x1.value(att), x2.value(att))) { inInterval = false; } } if (inInterval) { sampledPoints.add(inst); } } augInstances.add(AbstractAugmentedSpaceSampler.generateAugPoint(sampledPoints)); } } return augInstances; }
Example 8
Source File: MultivariateShapeletTransformClassifier.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override public double classifyInstance(Instance ins) throws Exception{ format.add(ins); Instances temp = doTransform ? transform.process(format) : format; //Delete redundant for(int del:redundantFeatures) temp.deleteAttributeAt(del); Instance test = temp.get(0); format.remove(0); return ensemble.classifyInstance(test); }
Example 9
Source File: KMeans.java From tsml with GNU General Public License v3.0 | 5 votes |
private double[][] createCenterDistances(Instances data){ double[][] centerDists = new double[k][numInstances]; for (int i = 0; i < numInstances; i++){ Instance first = data.get(i); for (int n = 0; n < k; n++){ centerDists[n][i] = distFunc.distance(first, clusterCenters.get(n)); } } return centerDists; }
Example 10
Source File: InstanceTools.java From tsml with GNU General Public License v3.0 | 5 votes |
public static Pair<Instance, Double> findMinDistance(Instances data, Instance inst, DistanceFunction dist){ double min = dist.distance(data.get(0), inst); Instance minI = data.get(0); for (int i = 1; i < data.numInstances(); i++) { double temp = dist.distance(data.get(i), inst); if(temp < min){ min = temp; minI = data.get(i); } } return new Pair(minI, min); }
Example 11
Source File: ClusteringUtilities.java From tsml with GNU General Public License v3.0 | 5 votes |
public static double[][] createDistanceMatrix(Instances data, DistanceFunction distFunc){ double[][] distMatrix = new double[data.numInstances()][]; distFunc.setInstances(data); for (int i = 1; i < data.numInstances(); i++){ distMatrix[i] = new double[i]; Instance first = data.get(i); for (int n = 0; n < i; n++){ distMatrix[i][n] = distFunc.distance(first, data.get(n)); } } return distMatrix; }
Example 12
Source File: AutoFEWekaPipeline.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
@Override public Instance transformData(final INDArray instance, final Instances refInstances) throws InterruptedException { List<INDArray> data = new LinkedList<>(); data.add(instance); Instances wekaData = this.transformData(new DataSet(refInstances, data)); return wekaData.get(0); }
Example 13
Source File: Dl4JMlpFilterTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
protected void checkZooModelMNIST(AbstractZooModel zooModel, boolean isMeta) throws Exception { Dl4jMlpFilter myFilter = new Dl4jMlpFilter(); Dl4jMlpClassifier clf = new Dl4jMlpClassifier(); Instances instances = null; AbstractInstanceIterator iterator = null; // Load the MNIST meta arff with ImageInstanceIterator if (isMeta) { instances = DatasetLoader.loadMiniMnistMeta(); iterator = DatasetLoader.loadMiniMnistImageIterator(); } else { // Load the Convolutional version of MNIST instances = DatasetLoader.loadMiniMnistArff(); iterator = new ConvolutionInstanceIterator(); ((ConvolutionInstanceIterator) iterator).setNumChannels(1); ((ConvolutionInstanceIterator) iterator).setHeight(28); ((ConvolutionInstanceIterator) iterator).setWidth(28); } clf.setInstanceIterator(iterator); myFilter.setInstanceIterator(iterator); // Testing pretrained model, no point training for 1 epoch clf.setNumEpochs(0); clf.setZooModel(zooModel); clf.buildClassifier(instances); Instances activationsExpected = clf.getActivationsAtLayers(new String[] { zooModel.getFeatureExtractionLayer() }, instances); myFilter.setZooModelType(zooModel); myFilter.setInputFormat(instances); Instances activationsActual = Filter.useFilter(instances, myFilter); for (int i = 0; i < activationsActual.size(); i++) { Instance expected = activationsExpected.get(i); Instance actual = activationsActual.get(i); for (int j = 0; j < expected.numAttributes(); j++) { assertEquals(expected.value(j), actual.value(j), 1e-6); } } }
Example 14
Source File: DatabaseConnectorTest.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
@Test public void testGetInstances() throws RetrieveInstancesFromDatabaseFailedException { Database db = DBUtils.deserializeFromFile(DATABASE_MODEL_FILE); Table customer = DBUtils.getTableByName("Customer", db); Table product = DBUtils.getTableByName("Product", db); // Select two features (one forward, one backward) List<AbstractFeature> selectedFeatures = new ArrayList<>(); ForwardFeature firstName = new ForwardFeature(DBUtils.getAttributeByName("FirstName", customer)); BackwardFeature price = new BackwardFeature(DBUtils.getAttributeByName("Price", product)); Path path = price.getPath(); path.addPathElement(new BackwardRelationship("Orders", "Product", "OrderId"), AggregationFunction.MAX); path.addPathElement(new BackwardRelationship("Customer", "Orders", "CustomerId"), AggregationFunction.AVG); selectedFeatures.add(firstName); selectedFeatures.add(price); // Get instances DatabaseConnector dbCon = new DatabaseConnectorImpl(db); Instances instances = dbCon.getInstances(selectedFeatures); // Cleanup in any case dbCon.cleanup(); // Check correctness for first instance Instance i = instances.get(0); Attribute a = i.attribute(0); assertEquals("Alina", a.value((int) i.value(0))); assertEquals(15000, i.value(1), 0); }
Example 15
Source File: TSBF.java From tsml with GNU General Public License v3.0 | 4 votes |
private void buildFirstClassificationProblem(Instances data, Instances features){ int instPos=0; // System.out.println(" Number of subsequences ="+numSubSeries+"number of intervals per subsequence ="+numIntervals+" number of cases ="+data.numInstances()+" new number of cases ="+features.numInstances()); for(int k=0;k<data.numInstances();k++){// Instance ins:data){ double[] series=data.instance(k).toDoubleArray(); // if(k==0) // System.out.println("INSTANCE 0="+data.instance(0)); // System.out.println(" Series length ="+(series.length-1)); for(int i=0;i<numSubSeries;i++){ int pos=0; // if(k==0) // System.out.println(" Setting subseries "+i+" ["+subSeries[i][0]+","+subSeries[i][1]+"]"); //Get whole subseries instance subseries features Instance newIns=features.get(instPos++); FeatureSet f=new FeatureSet(); f.setFeatures(series,subSeries[i][0], subSeries[i][1]); // if(k==0) // System.out.println("New num features ="+newIns.numAttributes()+" Whole subsequence features ="+f); newIns.setValue(pos++,f.mean); newIns.setValue(pos++,f.stDev); newIns.setValue(pos++,f.slope); //Add start and end point newIns.setValue(pos++,subSeries[i][0]); newIns.setValue(pos++,subSeries[i][1]); //Get interval features for(int j=0;j<numIntervals;j++){ // if(k==0) // System.out.println(" Setting interval "+j+" ["+intervals[i][j][0]+","+intervals[i][j][1]+"]"); f.setFeatures(series, intervals[i][j][0],intervals[i][j][1]); newIns.setValue(pos++,f.mean); newIns.setValue(pos++,f.stDev); newIns.setValue(pos++,f.slope); } } } if(InstanceTools.hasMissing(features)){ System.out.println(" MISSING A VALUE"); for(int i=0;i<features.numInstances();i++){ if(features.instance(i).hasMissingValue()){ System.out.println("Instance ="+features.instance(i)+" from original instance "+i/numSubSeries+" ::"+data.instance(i/numSubSeries)); System.out.println("\tSubsequence = ["+subSeries[i%numSubSeries][0]+","+subSeries[i%numSubSeries][1]+"]"); for(int j=0;j<numIntervals;j++){ System.out.println("\t\t interval "+j+" ["+intervals[i%numSubSeries][j][0]+","+intervals[i%numSubSeries][j][1]+"]"); } } } // System.out.println(" new data ="+features); System.exit(0); } }
Example 16
Source File: ARAMNetworkSparse.java From meka with GNU General Public License v3.0 | 4 votes |
/** * Generates the classifier. * * @param instances set of instances serving as training data * @exception Exception if the classifier has not been generated * successfully */ public void buildClassifier(Instances D) throws Exception { int L = D.classIndex(); int featlength = (D.numAttributes() -L)*2; int numSamples = D.numInstances(); int classlength = L * 2; if (this.order==null){ order = new ArrayList<Integer>(); for (int j=0; j<D.numInstances();j++){ order.add(j); } } if (numFeatures==-1){ initARAM( featlength,classlength ,roa , threshold ); }else{ if (featlength != numFeatures) { return ; } if (classlength != numClasses) { return ; }} // Copy the instances so we don't mess up the original data. // Function calls do not deep copy the arguments.. //Instances m_Instances = new Instances(instances); // Use the enumeration of instances to train classifier. // Do any sanity checks (e.g., missing attributes etc here // before calling updateClassifier for the actual learning for(int i=0; i<D.numInstances();i++){ Instance instance = D.get(order.get(i)); updateClassifier(instance); } System.out.println("Training done, used "+numCategories+" neurons with rho ="+roa+"."); // Alternatively, you can put the training logic within this method, // rather than updateClassifier(...). However, if you omit the // updateClassifier(...) method, you should remove // UpdateableClassifier from the class declaration above. }
Example 17
Source File: CrossValidationExperiments.java From NLIWOD with GNU Affero General Public License v3.0 | 4 votes |
public static void main(String[] args) throws Exception { Path datapath= Paths.get("./src/main/resources/old/Qald6Logs.arff"); BufferedReader reader = new BufferedReader(new FileReader(datapath.toString())); ArffReader arff = new ArffReader(reader); Instances data = arff.getData(); data.setClassIndex(6); ArrayList<String> systems = Lists.newArrayList("KWGAnswer", "NbFramework", "PersianQA", "SemGraphQA", "UIQA_withoutManualEntries", "UTQA_English" ); int seed = 133; // Change to 100 for leave-one-out CV int folds = 10; Random rand = new Random(seed); Instances randData = new Instances(data); randData.randomize(rand); float cv_ave_f = 0; for(int n=0; n < folds; n++){ Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); //Change to the Classifier of your choice CDN Classifier = new CDN(); Classifier.buildClassifier(train); float ave_p = 0; float ave_r = 0; for(int j = 0; j < test.size(); j++){ Instance ins = test.get(j); int k = 0; for(int l=0; l < data.size(); l++){ Instance tmp = data.get(l); if(tmp.toString().equals(ins.toString())){ k = l; } } double[] confidences = Classifier.distributionForInstance(ins); int argmax = -1; double max = -1; for(int i = 0; i < 6; i++){ if(confidences[i]>max){ max = confidences[i]; argmax = i; } } String sys2ask = systems.get(systems.size() - argmax -1); ave_p += Float.parseFloat(Utils.loadSystemP(sys2ask).get(k)); ave_r += Float.parseFloat(Utils.loadSystemR(sys2ask).get(k)); } double p = ave_p/test.size(); double r = ave_r/test.size(); double fmeasure = 0; if(p>0&&r>0){fmeasure = 2*p*r/(p + r);} System.out.println("macro F on fold " + n + ": " + fmeasure); cv_ave_f += fmeasure/folds; } System.out.println("macro F average: " + cv_ave_f); System.out.println('\n'); }
Example 18
Source File: ARAMNetwork.java From meka with GNU General Public License v3.0 | 4 votes |
/** * Generates the classifier. * * @param instances set of instances serving as training data * @exception Exception if the classifier has not been generated * successfully */ public void buildClassifier(Instances D) throws Exception { int L = D.classIndex(); int featlength = (D.numAttributes() -L)*2; int numSamples = D.numInstances(); int classlength = L * 2; if (this.order==null){ order = new ArrayList<Integer>(); for (int j=0; j<D.numInstances();j++){ order.add(j); } } if (numFeatures==-1){ initARAM( featlength,classlength ,roa , threshold ); }else{ if (featlength != numFeatures) { return ; } if (classlength != numClasses) { return ; }} // Copy the instances so we don't mess up the original data. // Function calls do not deep copy the arguments.. //Instances m_Instances = new Instances(instances); // Use the enumeration of instances to train classifier. // Do any sanity checks (e.g., missing attributes etc here // before calling updateClassifier for the actual learning //Enumeration enumInsts = D.enumerateInstances(); for(int i=0; i<D.numInstances();i++){ Instance instance = D.get(order.get(i)); updateClassifier(instance); } System.out.println("Training done, used "+numCategories+" neurons with rho ="+roa+"."); // Alternatively, you can put the training logic within this method, // rather than updateClassifier(...). However, if you omit the // updateClassifier(...) method, you should remove // UpdateableClassifier from the class declaration above. }
Example 19
Source File: AutoTestAdjust.java From bestconf with Apache License 2.0 | 4 votes |
public Instances runExp(Instances samplePoints, String perfAttName){ Instances retVal = null; if(samplePoints.attribute(perfAttName) == null){ Attribute performance = new Attribute(perfAttName); samplePoints.insertAttributeAt(performance, samplePoints.numAttributes()); } int pos = samplePoints.numInstances(); int count = 0; for (int i = 0; i < pos; i++) { Instance ins = samplePoints.get(i); HashMap hm = new HashMap(); int tot = 0; for (int j = 0; j < ins.numAttributes(); j++) { hm.put(ins.attribute(j).name(), ins.value(ins.attribute(j))); } boolean testRet; if (Double.isNaN(ins.value(ins.attribute(ins.numAttributes() - 1)))) { testRet = this.startTest(hm, i, isInterrupt); double y = 0; if (!testRet) {// the setting does not work, we skip it y = -1; count++; if (count >= targetTestErrorNum) { System.out.println("There must be somthing wrong with the system. Please check and restart....."); System.exit(1); } } else { y = getPerformanceByType(performanceType); count = 0; } ins.setValue(samplePoints.numAttributes() - 1, y); writePerfstoFile(ins); } else { continue; } } retVal = samplePoints; retVal.setClassIndex(retVal.numAttributes()-1); return retVal; }
Example 20
Source File: ARAMNetworkSparseHT.java From meka with GNU General Public License v3.0 | 4 votes |
/** * Generates the classifier. * * @param instances set of instances serving as training data * @exception Exception if the classifier has not been generated * successfully */ public void buildClassifier(Instances D) throws Exception { int L = D.classIndex(); int featlength = (D.numAttributes() -L)*2; int numSamples = D.numInstances(); int classlength = L * 2; if (this.order==null){ order = new ArrayList<Integer>(); for (int j=0; j<D.numInstances();j++){ order.add(j); } } if (numFeatures==-1){ initARAM( featlength,classlength ,roa , threshold ); }else{ if (featlength != numFeatures) { return ; } if (classlength != numClasses) { return ; }} // Copy the instances so we don't mess up the original data. // Function calls do not deep copy the arguments.. //Instances m_Instances = new Instances(instances); // Use the enumeration of instances to train classifier. // Do any sanity checks (e.g., missing attributes etc here // before calling updateClassifier for the actual learning for(int i=0; i<D.numInstances();i++){ Instance instance = D.get(order.get(i)); updateClassifier(instance); } System.out.println("Training done, used "+numCategories+" neurons with rho ="+roa+"."); // Alternatively, you can put the training logic within this method, // rather than updateClassifier(...). However, if you omit the // updateClassifier(...) method, you should remove // UpdateableClassifier from the class declaration above. }