weka.core.Instances#add

Source File: DataSetUtilsTest.java From AILibs with GNU Affero General Public License v3.0

6 votes

public void cifar10InstancesAttributesTest() {
    ArrayList<Attribute> atts = new ArrayList<>();
    for (int i = 0; i < 32 * 32 * 3 + 1; i++) {
        atts.add(new Attribute("blub" + i));
    }
    Instances instances = new Instances("test", atts, 1);
    DenseInstance inst = new DenseInstance(atts.size());
    for (int i = 0; i < inst.numAttributes(); i++) {
        inst.setValue(i, 1d);
    }
    inst.setDataset(instances);
    instances.add(inst);

    INDArray result = DataSetUtils.cifar10InstanceToMatrix(inst);
    Assert.assertArrayEquals(new long[]{32, 32, 3}, result.shape());
}

Source File: AbstractEnsemble.java From tsml with GNU General Public License v3.0

6 votes

/**
 * @return the distributions of each individual module, i.e [0] = first module's dist, [1] = second...
 */
public double[][] distributionForInstanceByConstituents(Instance instance) throws Exception{
    Instance ins = instance;
    if(this.transform!=null){
        Instances rawContainer = new Instances(instance.dataset(),0);
        rawContainer.add(instance);
        Instances converted = transform.process(rawContainer);
        ins = converted.instance(0);
    }

    double[][] distsByClassifier = new double[this.modules.length][];

    for(int i=0;i<modules.length;i++){
        distsByClassifier[i] = modules[i].getClassifier().distributionForInstance(ins);
    }

    return distsByClassifier;
}

Source File: BestConf.java From bestconf with Apache License 2.0

5 votes

public static void testCOMT2() throws Exception{
	BestConf bestconf = new BestConf();
	Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff");
	trainingSet.setClassIndex(trainingSet.numAttributes()-1);
	
	Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(), InitialSampleSetSize, false);
	samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes());
	samplePoints.setClassIndex(samplePoints.numAttributes()-1);
	
	COMT2 comt = new COMT2(samplePoints, COMT2Iteration);
	
	comt.buildClassifier(trainingSet);
	
	Evaluation eval = new Evaluation(trainingSet);
	eval.evaluateModel(comt, trainingSet);
	System.err.println(eval.toSummaryString());
	
	Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance());
	Instances bestInstances = new Instances(trainingSet,2);
	bestInstances.add(best);
	DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances);
	
	//now we output the training set with the class value updated as the predicted value
	Instances output = new Instances(trainingSet, trainingSet.numInstances());
	Enumeration<Instance> enu = trainingSet.enumerateInstances();
	while(enu.hasMoreElements()){
		Instance ins = enu.nextElement();
		double[] values = ins.toDoubleArray();
		values[values.length-1] = comt.classifyInstance(ins);
		output.add(ins.copy(values));
	}
	DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output);
}

Source File: KDTree.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Returns the k nearest neighbours of the supplied instance.
 * &gt;k neighbours are returned if there are more than one 
 * neighbours at the kth boundary. 
 * 
 * @param target	The instance to find the nearest neighbours for.
 * @param k 		The number of neighbours to find.
 * @return The k nearest neighbours (or &gt;k if more there are than
 * one neighbours at the kth boundary). 
 * @throws Exception 	if the nearest neighbour could not be found.
 */
public Instances kNearestNeighbours(Instance target, int k) throws Exception {
  checkMissing(target);

  if (m_Stats != null)
    m_Stats.searchStart();

  MyHeap heap = new MyHeap(k);
  findNearestNeighbours(target, m_Root, k, heap, 0.0);

  if (m_Stats != null)
    m_Stats.searchFinish();

  Instances neighbours = new Instances(m_Instances, (heap.size() + heap
      .noOfKthNearest()));
  m_DistanceList = new double[heap.size() + heap.noOfKthNearest()];
  int[] indices = new int[heap.size() + heap.noOfKthNearest()];
  int i = indices.length - 1;
  MyHeapElement h;
  while (heap.noOfKthNearest() > 0) {
    h = heap.getKthNearest();
    indices[i] = h.index;
    m_DistanceList[i] = h.distance;
    i--;
  }
  while (heap.size() > 0) {
    h = heap.get();
    indices[i] = h.index;
    m_DistanceList[i] = h.distance;
    i--;
  }
  m_DistanceFunction.postProcessDistances(m_DistanceList);

  for (int idx = 0; idx < indices.length; idx++) {
    neighbours.add(m_Instances.instance(indices[idx]));
  }

  return neighbours;
}

Source File: ZooModelTest.java From wekaDeeplearning4j with GNU General Public License v3.0

5 votes

private Instances shrinkInstances(Instances data) {
    ArrayList<Attribute> atts = new ArrayList<>();
    for (int i = 0; i < data.numAttributes(); i++) {
        atts.add(data.attribute(i));
    }
    Instances shrunkenData = new Instances("shrinked", atts, 10);
    shrunkenData.setClassIndex(1);
    for (int i = 0; i < 10; i++) {
        Instance inst = data.get(i);
        inst.setClassValue(i % 10);
        inst.setDataset(shrunkenData);
        shrunkenData.add(inst);
    }
    return shrunkenData;
}

Source File: FeatureGeneratorTree.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Override
public Instance apply(final Instance data) throws PreprocessingException {
	try {
		Instances instances = new Instances(data.dataset());
		instances.clear();
		instances.add(data);
		return this.apply(instances).firstInstance();
	} catch (Exception e) {
		throw new PreprocessingException(e);
	}
}

Source File: Standardization.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Override
public Instances apply(final Instances data) throws PreprocessingException {
	Instances newInstances = new Instances(data);
	newInstances.clear();
	for (Instance i : data) {
		newInstances.add(this.apply(i));
	}
	return newInstances;
}

Source File: WekaUtil.java From AILibs with GNU Affero General Public License v3.0

5 votes

public static Instances mergeClassesOfInstances(final Instances data, final Collection<String> cluster1, final Collection<String> cluster2) {
	Instances newData = WekaUtil.getEmptySetOfInstancesWithRefactoredClass(data);
	for (Instance i : data) {
		Instance iNew = (Instance) i.copy();
		String className = i.classAttribute().value((int) Math.round(i.classValue()));
		if (cluster1.contains(className)) {
			iNew.setClassValue(0.0);
			newData.add(iNew);
		} else if (cluster2.contains(className)) {
			iNew.setClassValue(1.0);
			newData.add(iNew);
		}
	}
	return newData;
}

Source File: BinaryTransform.java From tsml with GNU General Public License v3.0

5 votes

@Override
    public Instances process(Instances data) throws Exception{
         Instances output = determineOutputFormat(data);
         if(findNewSplits){
            splits=new  double[data.numAttributes()];
            double[] classes=new  double[data.numInstances()];
            for(int i=0;i<classes.length;i++)
                classes[i]=data.instance(i).classValue();
            for (int j=0; j< data.numAttributes(); j++) { // for each data
                if(j!=data.classIndex()){

    //Get values of attribute j
                    double[] vals=new double[data.numInstances()];
                    for(int i=0;i<data.numInstances();i++)
                        vals[i]=data.instance(i).value(j);
    //find the IG split point                
                    splits[j] =findSplitValue(data,vals,classes);
                }
            }
            findNewSplits=false;
         }
//Extract out the terms and set the attributes
        for(int i=0;i<data.numInstances();i++){
            Instance newInst=new DenseInstance(data.numAttributes());
            for(int j=0;j<data.numAttributes();j++){
                if(j!=data.classIndex()){
                    if(data.instance(i).value(j)<splits[j])
                        newInst.setValue(j,0);
                    else
                        newInst.setValue(j,1);
                }
                else
                    newInst.setValue(j,data.instance(i).classValue());
            }
            output.add(newInst);
        }
        return output;
    }

Source File: MLSophisticatedPipeline.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Override
public Instances apply(final Instances data) throws PreprocessingException{
	Instances probingResults = new Instances(this.getEmptyProbingResultDataset());
	for (Instance inst : data) {
		Instance probedInst = this.apply(inst);
		probedInst.setDataset(probingResults);
		probingResults.add(probedInst);
	}
	return probingResults;
}

Source File: C45Loader.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Return the full data set. If the structure hasn't yet been determined by a
 * call to getStructure then method should do so before processing the rest of
 * the data set.
 * 
 * @return the structure of the data set as an empty set of Instances
 * @exception IOException if there is no source or parsing fails
 */
@Override
public Instances getDataSet() throws IOException {
  if (m_sourceFile == null) {
    throw new IOException("No source has been specified");
  }
  if (getRetrieval() == INCREMENTAL) {
    throw new IOException(
        "Cannot mix getting Instances in both incremental and batch modes");
  }
  setRetrieval(BATCH);
  if (m_structure == null) {
    getStructure();
  }
  StreamTokenizer st = new StreamTokenizer(m_dataReader);
  initTokenizer(st);
  // st.ordinaryChar('.');
  Instances result = new Instances(m_structure);
  Instance current = getInstance(st);

  while (current != null) {
    result.add(current);
    current = getInstance(st);
  }
  try {
    // close the stream
    m_dataReader.close();
    // reset();
  } catch (Exception ex) {
    ex.printStackTrace();
  }
  return result;
}

Source File: BoTSWEnsemble.java From tsml with GNU General Public License v3.0

5 votes

public double[] distributionForInstanceSVM(Instance instnc) throws Exception {
    BoTSW_Bag testBag = buildTestBag(instnc);

    //classify
    Instances testBagData = new Instances(bagData, 1);
    double[] inst = new double[params.k+1];
    for (int j = 0; j < params.k; ++j)
        inst[j] = testBag.hist[j];
    inst[inst.length-1] = testBag.classValue;
    testBagData.add(new DenseInstance(1, inst));

    return svm.distributionForInstance(testBagData.get(0));
}

Source File: LHSInitializer.java From bestconf with Apache License 2.0

4 votes

/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * 
 * Let PACE be upper-lower DIVided by the sampleSetSize
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		bounds[i][0] = crntAttr.getLowerNumericBound();
		bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
		pace = (bounds[i][sampleSetSize] - bounds[i][0])/sampleSetSize;
		for(int j=1;j<sampleSetSize;j++){
			bounds[i][j] = bounds[i][j-1] + pace;
		}
	}
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			vals[j] = useMid?
					(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
						bounds[j][setWithMaxMinDist[j].get(i)]+
						(
							(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
						);
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}

Source File: UnionFilter.java From AILibs with GNU Affero General Public License v3.0

4 votes

static DataSet union(final DataSet coll1, final DataSet coll2) {
	if (coll1 == null || coll2 == null) {
		throw new IllegalArgumentException("Parameters 'coll1' and 'coll2' must not be null!");
	}

	if (coll1.getIntermediateInstances() == null || coll2.getIntermediateInstances() == null) {
		// Merge Weka instances
		Instances instances1 = coll1.getInstances();
		Instances instances2 = coll2.getInstances();

		if (instances1.numInstances() != instances2.numInstances()) {
			throw new IllegalArgumentException("Data sets to be united must have the same amount of instances!");
		}

		ArrayList<Attribute> attributes = new ArrayList<>(
				coll1.getInstances().numAttributes() + coll2.getInstances().numAttributes() - 1);
		for (int i = 0; i < instances1.numAttributes() - 1; i++) {
			attributes.add(instances1.attribute(i).copy(instances1.attribute(i).name() + "u1"));
		}
		for (int i = 0; i < instances2.numAttributes() - 1; i++) {
			attributes.add(instances2.attribute(i).copy(instances2.attribute(i).name() + "u2"));
		}

		// Add class attribute
		List<String> classValues = IntStream.range(0, instances1.classAttribute().numValues()).asDoubleStream()
				.mapToObj(String::valueOf).collect(Collectors.toList());
		Attribute classAtt = new Attribute("classAtt", classValues);
		attributes.add(classAtt);

		Instances unitedInstances = new Instances("UnitedInstances", attributes, instances1.numInstances());
		unitedInstances.setClassIndex(unitedInstances.numAttributes() - 1);

		for (int i = 0; i < instances1.numInstances(); i++) {
			Instance instance = new DenseInstance(attributes.size());
			instance.setDataset(unitedInstances);

			// Copy values
			int runningIndex = 0;
			for (int j = 0; j < instances1.numAttributes() - 1; j++) {
				instance.setValue(runningIndex++, instances1.get(i).value(j));
			}
			for (int j = 0; j < instances2.numAttributes() - 1; j++) {
				instance.setValue(runningIndex++, instances2.get(i).value(j));
			}
			instance.setClassValue(instances1.get(i).classValue());

			unitedInstances.add(instance);
		}

		return new DataSet(unitedInstances, null);
	} else {
		if (coll1.getIntermediateInstances().isEmpty() || coll2.getIntermediateInstances().isEmpty()) {
			throw new IllegalArgumentException("There must be intermediate instances if the collection is set.");
		}

		// Merge intermediate instances
		List<INDArray> intermediateInsts1 = coll1.getIntermediateInstances();
		List<INDArray> intermediateInsts2 = coll2.getIntermediateInstances();

		List<INDArray> unitedIntermediateInsts = new ArrayList<>(
				(int) (intermediateInsts1.get(0).length() + intermediateInsts2.get(0).length()));
		for (int i = 0; i < intermediateInsts1.size(); i++) {
			INDArray intermediateInst = Nd4j.hstack(intermediateInsts1.get(i).ravel(),
					intermediateInsts2.get(i).ravel());
			unitedIntermediateInsts.add(intermediateInst);
		}

		return new DataSet(coll1.getInstances(), unitedIntermediateInsts);
	}
}

Source File: DecisionTreeEstimator.java From jMetal with MIT License

4 votes

public double doPredictionVariable(int index,S testSolution) {
  double result = 0.0d;

  try {
    int numberOfVariables = solutionList.get(0).getNumberOfVariables();
    //Attributes
    //numeric
    Attribute attr = new Attribute("my-numeric");

    //nominal
    ArrayList<String> myNomVals = new ArrayList<>();

    for (int i=0; i<numberOfVariables; i++)
      myNomVals.add(VALUE_STRING+i);
    Attribute attr1 = new Attribute(NOMINAL_STRING, myNomVals);

    //string
    Attribute attr2 = new Attribute(MY_STRING, (List<String>)null);

    //2.create dataset
    ArrayList<Attribute> attrs = new ArrayList<>();
    attrs.add(attr);
    attrs.add(attr1);
    attrs.add(attr2);
    Instances dataset = new Instances("my_dataset", attrs, 0);

    //Add instances
    for (S solution : solutionList) {
      //instaces
      for (int i = 0; i <numberOfVariables ; i++) {
        double[] attValues = new double[dataset.numAttributes()];
        attValues[0] = ((DoubleSolution)solution).getVariable(i);
        attValues[1] = dataset.attribute(NOMINAL_STRING).indexOfValue(VALUE_STRING+i);
        attValues[2] = dataset.attribute(MY_STRING).addStringValue(solution.toString()+i);
        dataset.add(new DenseInstance(1.0, attValues));
      }
    }


    //DataSet test
    Instances datasetTest = new Instances("my_dataset_test", attrs, 0);

    //Add instances
    for (int i = 0; i < numberOfVariables; i++) {
      Instance test = new DenseInstance(3);
      test.setValue(attr, ((DoubleSolution)testSolution).getVariable(i));
      test.setValue(attr1, VALUE_STRING+i);
      test.setValue(attr2, testSolution.toString()+i);
      datasetTest.add(test);
      //  dataset.add(test);
    }


    //split to 70:30 learn and test set

    //Preprocess strings (almost no classifier supports them)
    StringToWordVector filter = new StringToWordVector();

    filter.setInputFormat(dataset);
    dataset = Filter.useFilter(dataset, filter);

    //Buid classifier
    dataset.setClassIndex(1);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);
    //resample if needed
    //dataset = dataset.resample(new Random(42));
    dataset.setClassIndex(1);
    datasetTest.setClassIndex(1);
    //do eval
    Evaluation eval = new Evaluation(datasetTest); //trainset
    eval.evaluateModel(classifier, datasetTest); //testset
    result = classifier.classifyInstance(datasetTest.get(index));
  } catch (Exception e) {
    result = ((DoubleSolution)testSolution).getVariable(index);
  }
  return result;
}

Source File: ADTree.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Recursive function that carries out search for the best test (splitter) to add to
  * this part of the tree, by aiming to minimize the Z value. Performs Z-pure cutoff to
  * reduce search space.
  *
  * @param currentNode the root of the subtree to be searched, and the current node 
  * being considered as parent of a new split
  * @param posInstances the positive-class instances that apply at this node
  * @param negInstances the negative-class instances that apply at this node
  * @exception Exception if search fails
  */
 private void searchForBestTestSingle(PredictionNode currentNode,
			       Instances posInstances, Instances negInstances)
   throws Exception {

   // don't investigate pure or empty nodes any further
   if (posInstances.numInstances() == 0 || negInstances.numInstances() == 0) return;

   // do z-pure cutoff
   if (calcZpure(posInstances, negInstances) >= m_search_smallestZ) return;

   // keep stats
   m_nodesExpanded++;
   m_examplesCounted += posInstances.numInstances() + negInstances.numInstances();

   // evaluate static splitters (nominal)
   for (int i=0; i<m_nominalAttIndices.length; i++)
     evaluateNominalSplitSingle(m_nominalAttIndices[i], currentNode,
			 posInstances, negInstances);

   // evaluate dynamic splitters (numeric)
   if (m_numericAttIndices.length > 0) {

     // merge the two sets of instances into one
     Instances allInstances = new Instances(posInstances);
     for (Enumeration e = negInstances.enumerateInstances(); e.hasMoreElements(); )
allInstances.add((Instance) e.nextElement());
   
     // use method of finding the optimal Z split-point
     for (int i=0; i<m_numericAttIndices.length; i++)
evaluateNumericSplitSingle(m_numericAttIndices[i], currentNode,
			   posInstances, negInstances, allInstances);
   }

   if (currentNode.getChildren().size() == 0) return;

   // keep searching
   switch (m_searchPath) {
   case SEARCHPATH_ALL:
     goDownAllPathsSingle(currentNode, posInstances, negInstances);
     break;
   case SEARCHPATH_HEAVIEST: 
     goDownHeaviestPathSingle(currentNode, posInstances, negInstances);
     break;
   case SEARCHPATH_ZPURE: 
     goDownZpurePathSingle(currentNode, posInstances, negInstances);
     break;
   case SEARCHPATH_RANDOM: 
     goDownRandomPathSingle(currentNode, posInstances, negInstances);
     break;
   }
 }

Source File: LHSSampler.java From bestconf with Apache License 2.0

4 votes

/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		if(crntAttr.isNumeric()){
			bounds[i][0] = crntAttr.getLowerNumericBound();
			bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
			pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize;
			for(int j=1;j<sampleSetSize;j++){
				bounds[i][j] = bounds[i][j-1] + pace;
			}
		}else{//crntAttr.isNominal()
			if(crntAttr.numValues()>=sampleSetSize){
				//randomly select among the set
				for(int j=0;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
			}else{
				//first round-robin
				int lastPart = sampleSetSize%crntAttr.numValues();
				for(int j=0;j<sampleSetSize-lastPart;j++)
					bounds[i][j] = j%crntAttr.numValues();
				//then randomly select
				for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
			}
		}//nominal attribute
	}//get all subdomains
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			if(atts.get(j).isNumeric()){
				vals[j] = useMid?
						(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
							bounds[j][setWithMaxMinDist[j].get(i)]+
							(
								(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
							);
			}else{//isNominal()
				vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
			}
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}

Source File: ArffFileFromRun.java From NLIWOD with GNU Affero General Public License v3.0

4 votes

public static void main(String[] args) throws Exception {
HAWK hawk = new HAWK();
SINA sina = new SINA();
QAKIS qakis = new QAKIS();
YODA yoda = new YODA();

/*
 * For multilable classification:
 */

ArrayList<String> fvhawk = new ArrayList<String>();
fvhawk.add("1");
fvhawk.add("0");
Attribute hawkatt = new Attribute("hawk", fvhawk);

ArrayList<String> fvqakis = new ArrayList<String>();
fvqakis.add("1");
fvqakis.add("0");
Attribute qakisatt = new Attribute("qakis", fvqakis);

ArrayList<String> fvyoda = new ArrayList<String>();
fvyoda.add("1");
fvyoda.add("0");
Attribute yodaatt = new Attribute("yoda", fvyoda);

ArrayList<String> fvsina = new ArrayList<String>();
fvsina.add("1");
fvsina.add("0");
Attribute sinaatt = new Attribute("sina", fvsina);


/*
 * 
 */

// 1. Learn on the training data for each system a classifier to find
// out which system can answer which question

// 1.1 load the questions and how good each system answers
log.debug("Load the questions and how good each system answers");
List<IQuestion> trainQuestions = LoaderController.load(Dataset.QALD6_Train_Multilingual);
List<ASystem> systems = Lists.newArrayList(hawk, sina, qakis, yoda);
JSONArray traindata = RunProducer.loadRunData(Dataset.QALD6_Train_Multilingual);

// 1.2 calculate the features per question and system
log.debug("Calculate the features per question and system");
Analyzer analyzer = new Analyzer();
ArrayList<Attribute> fvfinal = analyzer.fvWekaAttributes;

fvfinal.add(0, hawkatt);
fvfinal.add(0, yodaatt);
fvfinal.add(0, sinaatt);
fvfinal.add(0,qakisatt);


Instances trainingSet = new Instances("training_classifier: -C 4" , fvfinal, trainQuestions.size());
log.debug("Start collection of training data for each system");

	
for (int i = 0; i < traindata.size(); i++) {
	JSONObject questiondata = (JSONObject) traindata.get(i);
	JSONObject allsystemsdata = (JSONObject) questiondata.get("answers");
	String question = (String) questiondata.get("question");
	Instance tmp = analyzer.analyze(question);

	tmp.setValue(hawkatt, 0);
	tmp.setValue(yodaatt, 0);
	tmp.setValue(sinaatt, 0);
	tmp.setValue(qakisatt, 0);

	for(ASystem system: systems){
		JSONObject systemdata = (JSONObject) allsystemsdata.get(system.name());
		if(new Double(systemdata.get("fmeasure").toString()) > 0)
			switch (system.name()){
			case "hawk": tmp.setValue(hawkatt, 1); break;
			case "yoda": tmp.setValue(yodaatt, 1); break;
			case "sina": tmp.setValue(sinaatt, 1); break;
			case "qakis": tmp.setValue(qakisatt, 1); break;
			}
		}

	trainingSet.add(tmp);
	}
log.debug(trainingSet.toString());

try (FileWriter file = new FileWriter("./src/main/resources/old/Train.arff")) {
	file.write(trainingSet.toString());
} catch (IOException e) {
	e.printStackTrace();
}				
}

Source File: PropositionalToMultiInstance.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Signify that this batch of input to the filter is finished. 
 * If the filter requires all instances prior to filtering,
 * output() may now be called to retrieve the filtered instances.
 *
 * @return true if there are instances pending output
 * @throws IllegalStateException if no input structure has been defined
 */
public boolean batchFinished() {

  if (getInputFormat() == null) {
    throw new IllegalStateException("No input instance format defined");
  }

  Instances input = getInputFormat();
  input.sort(0);   // make sure that bagID is sorted
  Instances output = getOutputFormat();
  Instances bagInsts = output.attribute(1).relation();
  Instance inst = new DenseInstance(bagInsts.numAttributes());
  inst.setDataset(bagInsts);

  double bagIndex   = input.instance(0).value(0);
  double classValue = input.instance(0).classValue(); 
  double bagWeight  = 0.0;

  // Convert pending input instances
  for(int i = 0; i < input.numInstances(); i++) {
    double currentBagIndex = input.instance(i).value(0);

    // copy the propositional instance value, except the bagIndex and the class value
    for (int j = 0; j < input.numAttributes() - 2; j++) 
      inst.setValue(j, input.instance(i).value(j + 1));
    inst.setWeight(input.instance(i).weight());

    if (currentBagIndex == bagIndex){
      bagInsts.add(inst);
      bagWeight += inst.weight();
    }
    else{
      addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);

      bagInsts   = bagInsts.stringFreeStructure();  
      bagInsts.add(inst);
      bagIndex   = currentBagIndex;
      classValue = input.instance(i).classValue();
      bagWeight  = inst.weight();
    }
  }

  // reach the last instance, create and add the last bag
  addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);

  if (getRandomize())
    output.randomize(new Random(getSeed()));
  
  for (int i = 0; i < output.numInstances(); i++)
    push(output.instance(i));
  
  // Free memory
  flushInput();

  m_NewBatch = true;
  m_FirstBatchDone = true;
  
  return (numPendingOutput() != 0);
}

Source File: SAX.java From tsml with GNU General Public License v3.0

1 votes

/**
 * Will perform a SAX transformation on a single data series passed as a double[], input format
 * must already be known. 
 * 
 * Generally to be used 
 * in the SAX_1NN classifier (essentially a wrapper classifier that just feeds SAX-filtered
 * data to a 1NN classifier) to filter individual instances during testing
 * 
 * Instance objects need the header info as well as the basic data
 * 
 * @param alphabetSize size of SAX alphabet
 * @param numIntervals size of resulting word
 * @throws Exception 
 */
public Instance convertInstance(Instance inst, int alphabetSize, int numIntervals) throws Exception {

    Instances newInsts = new Instances(inputFormat, 1);
    newInsts.add(inst);
    
    newInsts = process(newInsts);
    
    return newInsts.firstInstance();
}

Java Code Examples for weka.core.Instances#add()