weka.core.Instances#size

Source File: CnnTextEmbeddingInstanceIteratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0

6 votes

/**
 * Test getDataSetIterator
 */
@Test
public void testGetIteratorNumericClass() throws Exception {
  final Instances data = makeData();
  final int batchSize = 1;
  final DataSetIterator it = this.cteii.getDataSetIterator(data, SEED, batchSize);

  Set<Double> labels = new HashSet<>();
  for (int i = 0; i < data.size(); i++) {
    Instance inst = data.get(i);
    double label = inst.value(data.classIndex());
    final DataSet next = Utils.getNext(it);
    double itLabel = next.getLabels().getDouble(0);
    Assert.assertEquals(label, itLabel, 1e-5);
    labels.add(label);
  }
}

Source File: CnnTextFilesEmbeddingInstanceIteratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0

6 votes

/**
 * Test getDataSetIterator
 */
@Test
public void testGetIteratorNumericClass() throws Exception {
  final Instances data = DatasetLoader.loadAngerMeta();
  final int batchSize = 1;
  final DataSetIterator it = this.cteii.getDataSetIterator(data, SEED, batchSize);

  Set<Double> labels = new HashSet<>();
  for (int i = 0; i < data.size(); i++) {
    Instance inst = data.get(i);
    double label = inst.value(data.classIndex());
    final DataSet next = Utils.getNext(it);
    double itLabel = next.getLabels().getDouble(0);
    Assert.assertEquals(label, itLabel, 1e-5);
    labels.add(label);
  }
}

Source File: Dl4JMlpFilterTest.java From wekaDeeplearning4j with GNU General Public License v3.0

6 votes

protected void checkLayer(Dl4jMlpClassifier clf, Instances instances, String[] transformationLayerNames,
    String clfPath, boolean useZooModel) throws Exception {
  Instances activationsExpected = clf.getActivationsAtLayers(transformationLayerNames, instances);
  Dl4jMlpFilter filter = new Dl4jMlpFilter();
  // Load the MNIST III if we're being called on the MNIST dataset (dataset is in meta format (String, class))
  if (ImageInstanceIterator.isMetaArff(instances))
    filter.setInstanceIterator(DatasetLoader.loadMiniMnistImageIterator());
  filter.setSerializedModelFile(new File(clfPath));
  filter.setTransformationLayerNames(transformationLayerNames);
  filter.setInputFormat(instances);
  filter.setPoolingType(PoolingType.NONE);

  Instances activationsActual = Filter.useFilter(instances, filter);

  for (int i = 0; i < activationsActual.size(); i++) {
    Instance expected = activationsExpected.get(i);
    Instance actual = activationsActual.get(i);
    for (int j = 0; j < expected.numAttributes(); j++) {
      assertEquals(expected.value(j), actual.value(j), 1e-6);
    }
  }
}

Source File: DataSetUtils.java From AILibs with GNU Affero General Public License v3.0

6 votes

public static Instance matrixToInstance(final INDArray instance, final Instances refInstances) {
	if (instance == null || refInstances == null) {
		throw new IllegalArgumentException("Parameter 'instance' and 'refInstances' must not be null!");
	}

	// Create attributes
	final ArrayList<Attribute> attributes = new ArrayList<>();
	for (int i = 0; i < instance.length(); i++) {
		final Attribute newAtt = new Attribute("val" + i);
		attributes.add(newAtt);
	}

	final List<String> classValues = IntStream.range(0, refInstances.classAttribute().numValues()).asDoubleStream().mapToObj(String::valueOf).collect(Collectors.toList());
	final Attribute classAtt = new Attribute(CLASS_ATT_NAME, classValues);
	attributes.add(classAtt);

	final Instances result = new Instances(INSTANCES_DS_NAME, attributes, refInstances.size());
	result.setClassIndex(result.numAttributes() - 1);

	// Initialize instance
	final Instance inst = new DenseInstance(1, ArrayUtils.addAll(Nd4j.toFlattened(instance).toDoubleVector(), 0));
	inst.setDataset(result);

	return inst;
}

Source File: ExactIntervalAugSpaceSampler.java From AILibs with GNU Affero General Public License v3.0

6 votes

@Override
public Instance augSpaceSample() {
	Instances preciseInsts = this.getPreciseInsts();
	int numInsts = preciseInsts.size();
	ArrayList<Instance> sampledPoints = new ArrayList<>();

	Instance x1 = preciseInsts.get(this.getRng().nextInt(numInsts));
	Instance x2 = preciseInsts.get(this.getRng().nextInt(numInsts));

	// Assume last attribute is the class
	int numFeatures = preciseInsts.numAttributes() - 1;

	for (Instance inst : preciseInsts) {
		boolean inInterval = true;
		for (int att = 0; att < numFeatures && inInterval; att++) {
			if (inst.value(att) < Math.min(x1.value(att), x2.value(att)) || inst.value(att) > Math.max(x1.value(att), x2.value(att))) {
				inInterval = false;
			}
		}
		if (inInterval) {
			sampledPoints.add(inst);
		}
	}

	return generateAugPoint(sampledPoints);
}

Source File: CnnTextFilesEmbeddingInstanceIteratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0

6 votes

/**
 * Test getDataSetIterator
 */
@Test
public void testGetIteratorNumericClass() throws Exception {
  final Instances data = DatasetLoader.loadAngerMeta();
  final int batchSize = 1;
  final DataSetIterator it = this.cteii.getDataSetIterator(data, SEED, batchSize);

  Set<Double> labels = new HashSet<>();
  for (int i = 0; i < data.size(); i++) {
    Instance inst = data.get(i);
    double label = inst.value(data.classIndex());
    final DataSet next = Utils.getNext(it);
    double itLabel = next.getLabels().getDouble(0);
    Assert.assertEquals(label, itLabel, 1e-5);
    labels.add(label);
  }
}

Source File: WekaInstancesTester.java From AILibs with GNU Affero General Public License v3.0

6 votes

@Test
public void testEqualnessOfTwoCopiesOfSameDataset() throws Exception {
	Instances ds1 = new Instances(new FileReader(this.dataset));
	ds1.setClassIndex(ds1.numAttributes() - 1);
	WekaInstances wrapped1 = new WekaInstances(ds1);

	Instances ds2 = new Instances(new FileReader(this.dataset));
	ds2.setClassIndex(ds2.numAttributes() - 1);
	WekaInstances wrapped2 = new WekaInstances(ds2);

	/* first conduct an instance-wise comparison and a mutual containment check */
	int n = ds1.size();
	assertEquals("Copy of dataset has different length than the original.", n, ds2.size());
	for (int i = 0; i < n; i++) {
		IWekaInstance i1 = wrapped1.get(i);
		IWekaInstance i2 = wrapped2.get(i);
		assertEquals("Hash codes of single instance don't match!", i1.hashCode(), i2.hashCode());
		assertEquals("Comparing the instances with equals yields false.", i1, i2);
		assertTrue("The second dataset does not contain " + i1 + ", which is contained in the first.", wrapped2.contains(i1));
		assertTrue("The first dataset does not contain " + i2 + ", which is contained in the second.", wrapped1.contains(i2));
	}

	/* now compare the entire dataset */
	assertEquals("Hash codes of entire dataset don't match!", wrapped1.hashCode(), wrapped2.hashCode());
	assertEquals("Comparing the datasets with equals yields false.", wrapped1, wrapped2);
}

Source File: RLTunedKNNSetup.java From tsml with GNU General Public License v3.0

5 votes

@Override
public void accept(Instances trainData) {
    neighbourCount = new Box<>(1); // must start at 1 otherwise the loocv produces no train estimate
    paramCount = new Box<>(0);
    longestExploreTimeNanos = 0;
    id = 0;
    longestExploitTimeNanos = 0;
    nextImproveableBenchmarks = new HashSet<>();
    improveableBenchmarks = new HashSet<>();
    unimproveableBenchmarks = new HashSet<>();
    switchImproveableBenchmarks();
    finalBenchmarks = PrunedMultimap.desc(ArrayList::new);
    finalBenchmarks.setSoftLimit(1);
    final int seed = rlTunedClassifier.getSeed();
    paramSpace = paramSpaceBuilder.apply(trainData);
    paramSetIterator = new RandomListIterator<>(this.paramSpace, seed).setRemovedOnNext(true);
    fullParamSpaceSize = this.paramSpace.size();
    fullNeighbourhoodSize = trainData.size(); // todo check all seeds set
    maxNeighbourhoodSize = findLimit(fullNeighbourhoodSize, neighbourhoodSizeLimit, neighbourhoodSizeLimitPercentage);
    maxParamSpaceSize = findLimit(fullParamSpaceSize, paramSpaceSizeLimit, paramSpaceSizeLimitPercentage);
    if(!incrementalMode) {
        neighbourCount.set(maxNeighbourhoodSize);
    }
    // transform classifiers into benchmarks
    explorer = new ParamExplorer();
    // setup an iterator to improve benchmarks
    exploiter = new NeighbourExploiter();
    stategy = new LeeStategy();
    agent = new KnnAgent();
    // set corresponding iterators in the incremental tuned classifier
    rlTunedClassifier.setAgent(agent);
    rlTunedClassifier.setEnsembler(Ensembler.single());
    // todo make sure the seeds are set for everything
}

Source File: PartitionedMultiFilter.java From tsml with GNU General Public License v3.0

5 votes

/**
  * tests the data whether the filter can actually handle it.
  *
  * @param instanceInfo	the data to test
  * @throws Exception		if the test fails
  */
 protected void testInputFormat(Instances instanceInfo) throws Exception {
   for (int i = 0; i < getRanges().length; i++) {
     Instances newi = new Instances(instanceInfo, 0);
     if (instanceInfo.size() > 0){
newi.add((Instance)instanceInfo.get(0).copy());
     }
     Range range = getRanges()[i];
     range.setUpper(instanceInfo.numAttributes() - 1);
     Instances subset = generateSubset(newi, range);
     getFilters()[i].setInputFormat(subset);
   }
 }

Source File: WekaUtil.java From AILibs with GNU Affero General Public License v3.0

5 votes

public static double[] getClassesAsArray(final Instances inst) {
	int n = inst.size();
	double[] vec = new double[n];
	for (int i = 0; i < n; i++) {
		vec[i] = inst.get(i).classValue();
	}
	return vec;
}

Source File: RankingByPairwiseComparison.java From AILibs with GNU Affero General Public License v3.0

5 votes

public void fit(final Instances dataset, final int labels) throws Exception {
	this.labelIndices = getLabelIndices(labels, dataset);
	this.labelIndices.stream().map(x -> dataset.attribute(x).name()).forEach(this.labelSet::add);
	Instances plainPWDataset = this.applyFiltersToDataset(dataset);

	try {
		for (int i = 0; i < this.labelIndices.size() - 1; i++) {
			for (int j = i + 1; j < this.labelIndices.size(); j++) {

				PairWiseClassifier pwc = new PairWiseClassifier();
				pwc.a = dataset.attribute(this.labelIndices.get(i)).name();
				pwc.b = dataset.attribute(this.labelIndices.get(j)).name();

				pwc.c = AbstractClassifier.forName(this.config.getBaseLearner(), null);

				Instances pwDataset = new Instances(plainPWDataset);

				for (int k = 0; k < pwDataset.size(); k++) {
					String value;
					if (dataset.get(k).value(this.labelIndices.get(i)) > dataset.get(k).value(this.labelIndices.get(j))) {
						value = "true";
					} else {
						value = "false";
					}
					pwDataset.get(k).setValue(pwDataset.numAttributes() - 1, value);
				}
				pwDataset.setClassIndex(pwDataset.numAttributes() - 1);

				pwc.c.buildClassifier(pwDataset);
				this.pwClassifiers.add(pwc);
			}
		}
	} catch (Exception e) {
		throw new TrainingException("Could not build ranker", e);
	}
}

Source File: RandomStratifiedIndexSampler.java From tsml with GNU General Public License v3.0

5 votes

public void setInstances(Instances instances) {
    instancesByClass = indexByClass(instances);
    classDistribution = classDistribution(instances);
    classSamplingProbabilities = classDistribution(instances);
    count = 0;
    maxCount = instances.size();
}

Source File: RandomStratifiedSampler.java From tsml with GNU General Public License v3.0

5 votes

public void setInstances(Instances instances) {
    instancesByClass = instancesByClass(instances);
    classDistribution = classDistribution(instances);
    classSamplingProbabilities = classDistribution(instances);
    count = 0;
    maxCount = instances.size();
}

Source File: InstanceTools.java From tsml with GNU General Public License v3.0

5 votes

public static List<List<Integer>> indexByClass(Instances instances) {
    List<List<Integer>> instancesByClass = new ArrayList<>();
    int numClasses = instances.get(0).numClasses();
    for(int i = 0; i < numClasses; i++) {
        instancesByClass.add(new ArrayList());
    }
    for(int i = 0; i < instances.size(); i++) {
        instancesByClass.get((int) instances.get(i).classValue()).add(i);
    }
    return instancesByClass;
}

Source File: MLPipeline.java From AILibs with GNU Affero General Public License v3.0

5 votes

public double[] classifyInstances(final Instances arg0) throws Exception {
	int n = arg0.size();
	double[] answers = new double[n];
	for (int i = 0; i < n; i++) {
		answers[i] = this.classifyInstance(arg0.get(i));
	}
	return answers;
}

Source File: TableMaker.java From NLIWOD with GNU Affero General Public License v3.0

4 votes

public static void main(String[] args) throws Exception {				 
	Path datapath= Paths.get("./src/main/resources/old/Qald6Logs.arff");
	BufferedReader reader = new BufferedReader(new FileReader(datapath.toString()));
	ArffReader arff = new ArffReader(reader);
	Instances data = arff.getData();
	data.setClassIndex(6);
	
	//Change To Classifier of Choice
	PSt Classifier = new PSt();
	Classifier.buildClassifier(data);

	
	JSONObject qald6test = Utils.loadTestQuestions();
		JSONArray questions = (JSONArray) qald6test.get("questions");
		ArrayList<String> testQuestions = Lists.newArrayList();
		for(int i = 0; i < questions.size(); i++){
			JSONObject questionData = (JSONObject) questions.get(i);
			JSONArray questionStrings = (JSONArray) questionData.get("question");
			JSONObject questionEnglish = (JSONObject) questionStrings.get(0);
			testQuestions.add((String) questionEnglish.get("string"));
		}
	ArrayList<String> systems = Lists.newArrayList("KWGAnswer", "NbFramework", "PersianQA", "SemGraphQA", "UIQA_withoutManualEntries", "UTQA_English" );
	double avef = 0;
	double[] systemavef = {0,0,0,0,0,0,0};
	for(int i=0; i<data.size(); i++){
		String tmp = "";
		tmp += i +"\t &" + testQuestions.get(i);
		double bestf = 0;
		for(String system: systems){
			double p = Float.parseFloat(Utils.loadSystemP(system).get(i));				
			double r = Float.parseFloat(Utils.loadSystemR(system).get(i));
			double f = 0;
			if(!(p==0&&r==0)){
				f = 2*p*r/(p+r);
			}
			if(f > bestf){
				bestf = f;
			}
			tmp += "\t &" + Math.floor(f * 100) / 100;
			systemavef[systems.indexOf(system)] += f/data.size();
		}
		systemavef[6] += bestf/data.size();
		tmp += "\t &" + Math.floor(bestf * 100) / 100;
		double[] confidences = Classifier.distributionForInstance(data.get(i));
		System.out.println(Arrays.toString(confidences));
		int argmax = -1;
		double max = -1;
			for(int j = 0; j < 6; j++){
				if(confidences[j]>max){
					max = confidences[j];
					argmax = j;
				}
			}
			
		String sys2ask = systems.get(systems.size() - argmax -1);
		double systemp = Float.parseFloat(Utils.loadSystemP(sys2ask).get(i));				
		double systemr = Float.parseFloat(Utils.loadSystemR(sys2ask).get(i));
		double systemf = 0;
		if(!(systemp==0&&systemr==0)){
			systemf = 2*systemp*systemr/(systemp+systemr);
		}
		avef += systemf;
		tmp += "\t &" + Math.floor(systemf * 100) / 100;

		tmp += "\\\\";
		System.out.println(tmp);
	}
	System.out.println(Arrays.toString(systemavef));
	System.out.println(avef/data.size());
}

Source File: WekaUtil.java From AILibs with GNU Affero General Public License v3.0

4 votes

public static double getRelativeNumberOfInstancesFromClass(final Instances data, final Collection<String> cs) {
	return getNumberOfInstancesFromClass(data, cs) / (1f * data.size());
}

Source File: SequenceStatsCache.java From tsml with GNU General Public License v3.0

4 votes

public SequenceStatsCache(final Instances train, final int startingWindow) {
    this.train = train;
    int nSequences = train.size();
    int length = train.numAttributes() - 1;
    this.LEs = new ArrayList<>(nSequences);
    this.UEs = new ArrayList<>(nSequences);
    this.lastWindowComputed = new double[nSequences];
    this.lastERPWindowComputed = new double[nSequences];
    this.lastLCSSWindowComputed = new double[nSequences];
    Arrays.fill(this.lastWindowComputed, -1);
    Arrays.fill(this.lastERPWindowComputed, -1);
    Arrays.fill(this.lastLCSSWindowComputed, -1);
    this.currentWindow = startingWindow;
    this.mins = new double[nSequences];
    this.maxs = new double[nSequences];
    this.indexMins = new int[nSequences];
    this.indexMaxs = new int[nSequences];
    this.isMinFirst = new boolean[nSequences];
    this.isMinLast = new boolean[nSequences];
    this.isMaxFirst = new boolean[nSequences];
    this.isMaxLast = new boolean[nSequences];
    this.indicesSortedByAbsoluteValue = new IndexedDouble[nSequences][length];
    for (int i = 0; i < train.size(); i++) {
        double min = Double.POSITIVE_INFINITY;
        double max = Double.NEGATIVE_INFINITY;
        int indexMin = -1, indexMax = -1;
        for (int j = 0; j < train.numAttributes() - 1; j++) {
            double val = train.get(i).value(j);
            if (val > max) {
                max = val;
                indexMax = j;
            }
            if (val < min) {
                min = val;
                indexMin = j;
            }
            indicesSortedByAbsoluteValue[i][j] = new IndexedDouble(j, Math.abs(val));
        }
        indexMaxs[i] = indexMax;
        indexMins[i] = indexMin;
        mins[i] = min;
        maxs[i] = max;
        isMinFirst[i] = (indexMin == 0);
        isMinLast[i] = (indexMin == (train.numAttributes() - 2));
        isMaxFirst[i] = (indexMax == 0);
        isMaxLast[i] = (indexMax == (train.numAttributes() - 2));
        Arrays.sort(indicesSortedByAbsoluteValue[i], (v1, v2) -> -Double.compare(v1.value, v2.value));
        this.LEs.add(new double[length]);
        this.UEs.add(new double[length]);
    }
}

Source File: UnsupervisedShapelets.java From tsml with GNU General Public License v3.0

4 votes

private void extractUShapelets(Instances data){
    int[] shapeletLengths = {25, 50};

    if (data.numAttributes() < 50){
        shapeletLengths = new int[]{data.numAttributes()/2};
    }

    shapelets = new ArrayList();
    numInstances = data.size();
    Instance inst = data.firstInstance();
    boolean finished = false;

    while (!finished){
        ArrayList<UShapelet> shapeletCandidates = new ArrayList();

        //Finds all candidate shapelets on the selected instance
        for (int i = 0; i < shapeletLengths.length; i++){
            for (int n = 0; n < inst.numAttributes() - shapeletLengths[i]; n++){
                UShapelet candidate = new UShapelet(n, shapeletLengths[i], inst);
                candidate.computeGap(data);
                shapeletCandidates.add(candidate);
            }
        }

        double maxGap = -1;
        int maxGapIndex = -1;

        //Finds the shapelet with the highest gap value
        for (int i = 0; i < shapeletCandidates.size(); i++){
            if (shapeletCandidates.get(i).gap > maxGap){
                maxGap = shapeletCandidates.get(i).gap;
                maxGapIndex = i;
            }
        }

        //Adds the shapelet with the best gap value to the pool of shapelets
        UShapelet best = shapeletCandidates.get(maxGapIndex);
        shapelets.add(best);

        double[] distances = best.computeDistances(data);
        ArrayList<Double> lesserDists = new ArrayList();
        double maxDist = -1;
        int maxDistIndex = -1;

        //Finds the instance with the max dist to the shapelet and all with a dist lower than the distance used
        //to generate the gap value.
        for (int i = 0; i < distances.length; i++){
            if (distances[i] < best.dt){
                lesserDists.add(distances[i]);
            }
            else if (distances[i] > maxDist){
                maxDist = distances[i];
                maxDistIndex = i;
            }
        }

        //Use max dist instance to generate new shapelet and remove low distance instances
        if (lesserDists.size() == 1){
            finished = true;
        }
        else{
            inst = data.get(maxDistIndex);

            double mean = mean(lesserDists);
            double cutoff = mean + standardDeviation(lesserDists, mean);

            Instances newData = new Instances(data, 0);

            for (int i = 0; i < data.numInstances(); i++){
                if (distances[i] >= cutoff){
                    newData.add(data.get(i));
                }
            }

            data = newData;

            if (data.size() == 1){
                finished = true;
            }
        }
    }
}

Source File: CrossValidationExperiments.java From NLIWOD with GNU Affero General Public License v3.0

4 votes

public static void main(String[] args) throws Exception {		

		Path datapath= Paths.get("./src/main/resources/old/Qald6Logs.arff");
		BufferedReader reader = new BufferedReader(new FileReader(datapath.toString()));
		ArffReader arff = new ArffReader(reader);
		Instances data = arff.getData();
		data.setClassIndex(6);
		
		ArrayList<String> systems = Lists.newArrayList("KWGAnswer", "NbFramework", "PersianQA", "SemGraphQA", "UIQA_withoutManualEntries", "UTQA_English" );


		int seed = 133;
		// Change to 100 for leave-one-out CV
		int folds = 10;
		
		Random rand = new Random(seed);
		Instances randData = new Instances(data);
		randData.randomize(rand);

		float cv_ave_f = 0;
		
		for(int n=0; n < folds; n++){
		    Instances train = randData.trainCV(folds,  n);
		    Instances test = randData.testCV(folds,  n);
		    
		    //Change to the Classifier of your choice
			CDN Classifier = new CDN();
			Classifier.buildClassifier(train);
			

			float ave_p = 0;
			float ave_r = 0;
	
			for(int j = 0; j < test.size(); j++){
				Instance ins = test.get(j);
				int k = 0; 
				for(int l=0; l < data.size(); l++){
					Instance tmp = data.get(l);
					if(tmp.toString().equals(ins.toString())){
						k = l;
					}
				}		
				double[] confidences = Classifier.distributionForInstance(ins);
				int argmax = -1;
				double max = -1;
					for(int i = 0; i < 6; i++){
						if(confidences[i]>max){
							max = confidences[i];
							argmax = i;
						}
				}
				String sys2ask = systems.get(systems.size() - argmax -1);
				ave_p += Float.parseFloat(Utils.loadSystemP(sys2ask).get(k));				
				ave_r += Float.parseFloat(Utils.loadSystemR(sys2ask).get(k));
			}
			
			double p = ave_p/test.size();
			double r = ave_r/test.size();
			double fmeasure = 0;
			if(p>0&&r>0){fmeasure = 2*p*r/(p + r);}
			System.out.println("macro F on fold " + n + ": " + fmeasure);
			
			cv_ave_f += fmeasure/folds;
						
		}
		System.out.println("macro F average: " + cv_ave_f);
		System.out.println('\n');
	}

Java Code Examples for weka.core.Instances#size()