weka.core.Instances#numInstances

Source File: XMeans.java From tsml with GNU General Public License v3.0

6 votes

/**
 * Clusters an instance.
 * 
 * @param instance
 *          the instance to assign a cluster to.
 * @param centers
 *          the centers to cluster the instance to.
 * @return a cluster index.
 */
protected int clusterProcessedInstance(Instance instance, Instances centers) {
  
  double minDist = Integer.MAX_VALUE;
  int bestCluster = 0;
  for (int i = 0; i < centers.numInstances(); i++) {
    double dist = m_DistanceF.distance(instance, centers.instance(i));

    if (dist < minDist) {
      minDist = dist;
      bestCluster = i;
    }
  }
  ;
  return bestCluster;
}

Source File: CitationKNN.java From tsml with GNU General Public License v3.0

6 votes

/**
 * Wether the instances of two exemplars are or  are not equal
 * @param exemplar1 first exemplar
 * @param exemplar2 second exemplar
 * @return if the instances of the exemplars are equal or not
 */
public boolean equalExemplars(Instance exemplar1, Instance exemplar2){
  if(exemplar1.relationalValue(1).numInstances() == 
      exemplar2.relationalValue(1).numInstances()){
    Instances instances1 = exemplar1.relationalValue(1);
    Instances instances2 = exemplar2.relationalValue(1);
    for(int i = 0; i < instances1.numInstances(); i++){
      Instance instance1 = instances1.instance(i);
      Instance instance2 = instances2.instance(i);
      for(int j = 0; j < instance1.numAttributes(); j++){
        if(instance1.value(j) != instance2.value(j)){
          return false;
        }
      }
    }
    return true;
      }
  return false;
}

Source File: RuleStats.java From tsml with GNU General Public License v3.0

6 votes

/**
  * Static utility function to count the data covered by the 
  * rules after the given index in the given rules, and then
  * remove them.  It returns the data not covered by the
  * successive rules.
  *
  * @param data the data to be processed
  * @param rules the ruleset
  * @param index the given index
  * @return the data after processing
  */
 public static Instances rmCoveredBySuccessives(Instances data, FastVector rules, int index){
   Instances rt = new Instances(data, 0);

   for(int i=0; i < data.numInstances(); i++){
     Instance datum = data.instance(i);
     boolean covered = false;	    
    
     for(int j=index+1; j<rules.size();j++){
Rule rule = (Rule)rules.elementAt(j);
if(rule.covers(datum)){
  covered = true;
  break;
}
     }

     if(!covered)
rt.add(datum);
   }	
   return rt;
 }

Source File: C45PruneableClassifierTreeG.java From tsml with GNU General Public License v3.0

6 votes

/**
 * Initializes variables for grafting.
 * sets up limits array (for numeric attributes) and calls 
 * the recursive function traverseTree.
 *
 * @param data the data for the tree
 * @throws Exception if anything goes wrong
 */
public void doGrafting(Instances data) throws Exception {

  // 2d array for the limits
  double [][] limits = new double[data.numAttributes()][2];
  // 2nd dimension: index 0 == lower limit, index 1 == upper limit
  // initialise to no limit
  for(int i = 0; i < data.numAttributes(); i++) {
     limits[i][0] = Double.NEGATIVE_INFINITY;
     limits[i][1] = Double.POSITIVE_INFINITY;
  }

  // use an index instead of creating new Insances objects all the time
  // instanceIndex[0] == array for weights at leaf
  // instanceIndex[1] == array for weights in atbop
  double [][] instanceIndex = new double[2][data.numInstances()];
  // initialize the weight for each instance
  for(int x = 0; x < data.numInstances(); x++) {
      instanceIndex[0][x] = 1;
      instanceIndex[1][x] = 1;  // leaf instances are in atbop
  }

  // first call to graft
  traverseTree(data, instanceIndex, limits, this, 0, -1);
}

Source File: InstanceTools.java From tsml with GNU General Public License v3.0

5 votes

public static double[][] fromWekaInstancesArray(Instances ds, boolean removeLastVal) {
    int numFeatures = ds.numAttributes() - (removeLastVal ? 1 : 0);
    int numInstances = ds.numInstances();

   double[][] data = new double[numInstances][numFeatures];

    for (int i = 0; i < numInstances; i++) {
        for (int j = 0; j < numFeatures; j++) {
            data[i][j] = ds.get(i).value(j);
        }
    }

    return data;
}

Source File: SAXVSM.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Leave-one-out CV without re-doing bop transformation every fold (still re-applying tfxidf)
 * 
 * @return cv accuracy
 */
private double crossValidate(Instances data) throws Exception {
    transformedData = bop.process(data);
    
    double correct = 0;
    for (int i = 0; i < data.numInstances(); ++i) {
        corpus = tfxidf(transformedData, i); //apply tfxidf while ignoring BOP bag i 
        
        if (classifyInstance(data.get(i)) == data.get(i).classValue())
            ++correct;
    }
        
    return correct /  data.numInstances();
}

Source File: MultiInstanceToPropositional.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Sets the format of the input instances.
 *
 * @param instanceInfo an Instances object containing the input 
 * instance structure (any instances contained in the object are 
 * ignored - only the structure is required).
 * @return true if the outputFormat may be collected immediately
 * @throws Exception if the input format can't be set 
 * successfully
 */
public boolean setInputFormat(Instances instanceInfo) 
  throws Exception {

  if (instanceInfo.attribute(1).type()!=Attribute.RELATIONAL) {
    throw new Exception("Can only handle relational-valued attribute!");
  }  
  super.setInputFormat(instanceInfo);   

  m_NumBags = instanceInfo.numInstances();
  m_NumInstances = 0;
  for (int i=0; i<m_NumBags; i++)
    m_NumInstances += instanceInfo.instance(i).relationalValue(1).numInstances();

  Attribute classAttribute = (Attribute) instanceInfo.classAttribute().copy();
  Attribute bagIndex = (Attribute) instanceInfo.attribute(0).copy();

  /* create a new output format (propositional instance format) */
  Instances newData = instanceInfo.attribute(1).relation().stringFreeStructure();
  newData.insertAttributeAt(bagIndex, 0);
  newData.insertAttributeAt(classAttribute, newData.numAttributes());
  newData.setClassIndex(newData.numAttributes() - 1);

  super.setOutputFormat(newData.stringFreeStructure());

  m_BagStringAtts = new StringLocator(instanceInfo.attribute(1).relation().stringFreeStructure());
  m_BagRelAtts    = new RelationalLocator(instanceInfo.attribute(1).relation().stringFreeStructure());

  return true;
}

Source File: MergeNominalValues.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Processes the given data.
 * 
 * @param instances the data to process
 * @return the modified data
 * @throws Exception in case the processing goes wrong
 */
@Override
public Instances process(Instances instances) throws Exception {

  // Generate the output and return it
  Instances result = new Instances(getOutputFormat(),
      instances.numInstances());
  for (int i = 0; i < instances.numInstances(); i++) {
    Instance inst = instances.instance(i);
    double[] newData = new double[instances.numAttributes()];
    for (int j = 0; j < instances.numAttributes(); j++) {
      if (m_AttToBeModified[j] && !inst.isMissing(j)) {
        newData[j] = m_Indicators[j][(int) inst.value(j)];
      } else {
        newData[j] = inst.value(j);
      }
    }
    DenseInstance instNew = new DenseInstance(1.0, newData);
    instNew.setDataset(result);

    // copy possible strings, relational values...
    copyValues(instNew, false, inst.dataset(), getOutputFormat());

    // Add instance to output
    result.add(instNew);
  }
  return result;
}

Source File: PowerCepstrum.java From tsml with GNU General Public License v3.0

5 votes

public void logDataSet(Instances out ){
    for(int i=0;i<out.numInstances();i++){
        Instance ins=out.instance(i);
        for(int j=0;j<ins.numAttributes();j++){
            if(j!=ins.classIndex())
                ins.setValue(j,Math.log(ins.value(j)));
        }
    }


}

Source File: StatUtils.java From meka with GNU General Public License v3.0

5 votes

/**
 * LEAD - Performs LEAD on dataset 'D', using BR with base classifier 'h', under random seed 'r'.
 * <br>
 * WARNING: changing this method will affect the perfomance of e.g., BCC -- on the other hand the original BCC paper did not use LEAD, so don't worry.
 */
public static double[][] LEAD(Instances D, Classifier h, Random r)  throws Exception {
	Instances D_r = new Instances(D);
	D_r.randomize(r);
	Instances D_train = new Instances(D_r,0,D_r.numInstances()*60/100);
	Instances D_test = new Instances(D_r,D_train.numInstances(),D_r.numInstances()-D_train.numInstances());
	BR br = new BR();
	br.setClassifier(h);
	Result result = Evaluation.evaluateModel((MultiLabelClassifier)br,D_train,D_test,"PCut1","1");
	return LEAD2(D_test,result);
}

Source File: DD_DTW.java From tsml with GNU General Public License v3.0

5 votes

public static void main(String[] args){
    
    // option 1: simple example of the classifier
    // option 2: recreate the results from the original published work
    
    int option = 1;
    
    try{
        if(option==1){
            String dataName = "ItalyPowerDemand";
            Instances train = DatasetLoading.loadDataNullable(DATA_DIR+dataName+"/"+dataName+"_TRAIN");
            Instances test = DatasetLoading.loadDataNullable(DATA_DIR+dataName+"/"+dataName+"_TEST");
            
            // create the classifier, using DTW as the distance function as an example
            DD_DTW nndw = new DD_DTW(DistanceType.DTW);;
            
            // params a and b have not been explicitly set, so buildClassifier will cv to find them
            nndw.buildClassifier(train);
            
            int correct = 0;
            for(int i = 0; i < test.numInstances(); i++){
                if(nndw.classifyInstance(test.instance(i))==test.instance(i).classValue()){
                    correct++;
                }
            }
            System.out.println(dataName+":\t"+new DecimalFormat("#.###").format((double)correct/test.numInstances()*100)+"%");
            
        }else if(option==2){
            recreateResultsTable();
        }
    }catch(Exception e){
        e.printStackTrace();
    }
}

Source File: OnlineCachedShapeletDistance.java From tsml with GNU General Public License v3.0

5 votes

@Override
public void init(Instances dataInst)
{
    stats = new Stats();
    
    //Normalise all time series for further processing
    int dataSize = dataInst.numInstances();
    
    data = new double[dataSize][];
    for (int i = 0; i < dataSize; i++)
    {
        data[i] = seriesRescaler.rescaleSeries(dataInst.get(i).toDoubleArray(), true);
    }
}

Source File: StatUtils.java From meka with GNU General Public License v3.0

5 votes

/**
 * GetP - Get a pairwise empirical joint-probability matrix P[][] from dataset D.
 * <br>
 * NOTE multi-label only
 */
public static double[][] getP(Instances D) {
	double N = (double)D.numInstances();
	int L = D.classIndex();
	double P[][] = new double[L][L];
	for(int j = 0; j < L; j++) {
		P[j][j] = p(D,j,1);
		for(int k = j+1; k < L; k++) {
			P[j][k] = P(D,j,1,k,1);
		}
	}
	return P;
}

Source File: CitationKNN.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Build the list of nearest k neighbors to the given test instance.
 * @param bag the bag to search for neighbors of
 * @param kNN the number of nearest neighbors
 * @param bags the data
 * @return a list of neighbors
 */
protected NeighborList findNeighbors(Instance bag, int kNN, Instances bags){
  double distance;
  int index = 0;

  if(kNN > bags.numInstances())
    kNN = bags.numInstances() - 1;

  NeighborList neighborList = new NeighborList(kNN);
  for(int i = 0; i < bags.numInstances(); i++){
    if(bag != bags.instance(i)){ // for hold-one-out cross-validation
      distance =  distanceSet(bag, bags.instance(i)) ; //mDistanceSet.distance(bag, mInstances, bags.exemplar(i), mInstances);
      if(m_NeighborListDebug)
        System.out.println("distance(bag, " + i + "): " + distance);
      if(neighborList.isEmpty() || (index < kNN) || (distance <= neighborList.mLast.mDistance))
        neighborList.insertSorted(distance, bags.instance(i), i);
      index++;
    } 
  }

  if(m_NeighborListDebug){
    System.out.println("bag neighbors:");
    neighborList.printReducedList();
  }

  return neighborList;
}

Source File: ClassifierSplitModel.java From tsml with GNU General Public License v3.0

5 votes

/**
  * Splits the given set of instances into subsets.
  *
  * @exception Exception if something goes wrong
  */
 public final Instances [] split(Instances data) 
      throws Exception { 

   Instances [] instances = new Instances [m_numSubsets];
   double [] weights;
   double newWeight;
   Instance instance;
   int subset, i, j;

   for (j=0;j<m_numSubsets;j++)
     instances[j] = new Instances((Instances)data,
				    data.numInstances());
   for (i = 0; i < data.numInstances(); i++) {
     instance = ((Instances) data).instance(i);
     weights = weights(instance);
     subset = whichSubset(instance);
     if (subset > -1)
instances[subset].add(instance);
     else
for (j = 0; j < m_numSubsets; j++)
  if (Utils.gr(weights[j],0)) {
    newWeight = weights[j]*instance.weight();
    instances[j].add(instance);
    instances[j].lastInstance().setWeight(newWeight);
  }
   }
   for (j = 0; j < m_numSubsets; j++)
     instances[j].compactify();
   
   return instances;
 }

Source File: PowerCepstrum.java From tsml with GNU General Public License v3.0

5 votes

@Override
protected Instances determineOutputFormat(Instances inputFormat) throws Exception {

    //Set up instances size and format.

    int length=(fftFilter.findLength(inputFormat));
    length/=2;
    ArrayList<Attribute> atts=new ArrayList<>();
    String name;
    for(int i=0;i<length;i++){
        name = "PowerSpectrum_"+i;
        atts.add(new Attribute(name));
    }

    if(inputFormat.classIndex()>=0){	//Classification set, set class
        //Get the class values as a fast vector
        Attribute target =inputFormat.attribute(inputFormat.classIndex());

        ArrayList<String> vals=new ArrayList<>(target.numValues());
        for(int i=0;i<target.numValues();i++)
            vals.add(target.value(i));
        atts.add(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(),vals));
    }

    Instances result = new Instances("Cepstrum"+inputFormat.relationName(),atts,inputFormat.numInstances());
    if(inputFormat.classIndex()>=0)
        result.setClassIndex(result.numAttributes()-1);

    return result;
}

Source File: NSR.java From meka with GNU General Public License v3.0

4 votes

public Instances convertInstances(Instances D, int L) throws Exception {

		//Gather combinations
		HashMap<String,Integer> distinctCombinations = MLUtils.classCombinationCounts(D);
		if(getDebug())
			System.out.println("Found "+distinctCombinations.size()+" unique combinations");

		//Prune combinations
		MLUtils.pruneCountHashMap(distinctCombinations,m_P);
		if(getDebug())
			System.out.println("Pruned to "+distinctCombinations.size()+" with P="+m_P);

		// Remove all class attributes
		Instances D_ = MLUtils.deleteAttributesAt(new Instances(D),MLUtils.gen_indices(L));
		// Add a new class attribute
		D_.insertAttributeAt(new Attribute("CLASS", new ArrayList(distinctCombinations.keySet())),0); // create the class attribute
		D_.setClassIndex(0);

		//Add class values
		for (int i = 0; i < D.numInstances(); i++) {
			String y = MLUtils.encodeValue(MLUtils.toIntArray(D.instance(i),L));
			// add it
			if(distinctCombinations.containsKey(y)) 	//if its class value exists
				D_.instance(i).setClassValue(y);
			// decomp
			else if(m_N > 0) { 
				String d_subsets[] = SuperLabelUtils.getTopNSubsets(y, distinctCombinations, m_N);
				for (String s : d_subsets) {
					int w = distinctCombinations.get(s);
					Instance copy = (Instance)(D_.instance(i)).copy();
					copy.setClassValue(s);
					copy.setWeight(1.0 / d_subsets.length);
					D_.add(copy);
				}
			}
		}

		// remove with missing class
		D_.deleteWithMissingClass();

		// keep the header of new dataset for classification
		m_InstancesTemplate = new Instances(D_, 0);

		if (getDebug())
			System.out.println(""+D_);

		return D_;
	}

Source File: MIWrapper.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Computes the distribution for a given exemplar
  *
  * @param exmp the exemplar for which distribution is computed
  * @return the distribution
  * @throws Exception if the distribution can't be computed successfully
  */
 public double[] distributionForInstance(Instance exmp) 
   throws Exception {	

   Instances testData = new Instances (exmp.dataset(),0);
   testData.add(exmp);

   // convert the training dataset into single-instance dataset
   m_ConvertToProp.setWeightMethod(
       new SelectedTag(
         MultiInstanceToPropositional.WEIGHTMETHOD_ORIGINAL, 
         MultiInstanceToPropositional.TAGS_WEIGHTMETHOD));
   testData = Filter.useFilter(testData, m_ConvertToProp);
   testData.deleteAttributeAt(0); //remove the bag index attribute

   // Compute the log-probability of the bag
   double [] distribution = new double[m_NumClasses];
   double nI = (double)testData.numInstances();
   double [] maxPr = new double [m_NumClasses];

   for(int i=0; i<nI; i++){
     double[] dist = m_Classifier.distributionForInstance(testData.instance(i));
     for(int j=0; j<m_NumClasses; j++){

       switch(m_Method){
         case TESTMETHOD_ARITHMETIC:
           distribution[j] += dist[j]/nI;
           break;
         case TESTMETHOD_GEOMETRIC:
           // Avoid 0/1 probability
           if(dist[j]<0.001)
             dist[j] = 0.001;
           else if(dist[j]>0.999)
             dist[j] = 0.999;

           distribution[j] += Math.log(dist[j])/nI;
           break;
         case TESTMETHOD_MAXPROB:
           if (dist[j]>maxPr[j]) 
             maxPr[j] = dist[j];
           break;
       }
     }
   }

   if(m_Method == TESTMETHOD_GEOMETRIC)
     for(int j=0; j<m_NumClasses; j++)
       distribution[j] = Math.exp(distribution[j]);

   if(m_Method == TESTMETHOD_MAXPROB){   // for positive bag
     distribution[1] = maxPr[1];
     distribution[0] = 1 - distribution[1];
   }

   if (Utils.eq(Utils.sum(distribution), 0)) {
     for (int i = 0; i < distribution.length; i++)
distribution[i] = 1.0 / (double) distribution.length;
   }
   else {
     Utils.normalize(distribution);
   }
   
   return distribution;
 }

Source File: BoxTidwell.java From tsml with GNU General Public License v3.0

4 votes

public static Instances transformRegressor(Instances data, int pos,int resultPos, double[] powers)
	{

//1. Get values of the attribute of interest. 
		
//Confusingly, am working with attributes in rows not columns		
		double[] temp=data.attributeToDoubleArray(pos);
		double[] originalData= new double[temp.length];
		double[] logData= new double[temp.length];
		
		for(int i=0;i<temp.length;i++)
		{
			originalData[i]=temp[i];
			logData[i]=Math.log(temp[i]);	
		}
		double[] y =data.attributeToDoubleArray(data.classIndex()); 
//		I'm not sure if this is a memory copy or a reference copy, so be safe
		double[][] transposeFirst = new double[data.numAttributes()][data.numInstances()];
		double[][] transposeSecond = new double[data.numAttributes()+1][data.numInstances()];
		for(int j=0;j<data.numInstances();j++)
		{
			transposeFirst[0][j]=transposeSecond[0][j]=1;
		}
		for(int i=1;i<data.numAttributes();i++)
		{
			transposeFirst[i]=transposeSecond[i]=data.attributeToDoubleArray(i-1);
		}
//		Add one to pos cos of the ones
		pos=pos+1;
//		Second has an attribute at the end of data for transform
		int workingPos=data.numAttributes();
		LinearModel l1,l2;
		double alpha=1, b1,b2;
		double min=0.1;
		boolean finished=false;
		int count=0;
		final int MaxIterations=10;
		//		Initialise alpha to 1
//Find Base SSE		
		//While not termination condition
		while(!finished)
		{
//			System.out.println(" Iteration = "+(count+1)+" alpha = "+alpha);
			//Create new attributes
			//1. Calculate x^alpha
			for(int j=0;j<originalData.length;j++)
			{
				transposeSecond[pos][j]=transposeFirst[pos][j]=Math.pow(originalData[j],alpha);
			}

			//2. Fit y=b1+ .. b_pos	x^alpha (+ other terms)-> get b_pos
			l1=new LinearModel(transposeFirst,y);	
			l1.fitModel();
			
//Not necessary: 
//			l1.formTrainPredictions();
//			l1.findTrainStatistics();
//			System.out.println(l1+"\nVariance for L1 = "+l1.variance);
			
			b1=l1.paras[pos];
			//3. Fit y=b*1+ .. b*_pos	x^alpha +b*_workingPos x^alpha*log(x) (+ other terms)-> get b*2
			//2. Calculate x^alpha*log(x)
			for(int j=0;j<originalData.length;j++)
				transposeSecond[workingPos][j]=transposeFirst[pos][j]*logData[j];
			l2=new LinearModel(transposeSecond,y);	
			l2.fitModel();
			
//			Not necessary: 
//			l2.formTrainPredictions();
//			l2.findTrainStatistics();
//			System.out.println(l2+"\nVariance for L2 = "+l2.variance);
			
			b2=l2.paras[workingPos];
			
			alpha+=b2/b1;
			//Work out change term alpha = b*2/b1+alpha0
//			System.out.println("New Alpha ="+alpha+" b1 = "+b1+" b2 = "+b2);
			//Update termination criteria: stop if small change: check notes
			count++;
			if(Math.abs(b2/b1)<min || count>=MaxIterations)
				finished=true;
			else if(Math.abs(alpha)>10)
			{
				alpha=1;
				finished=true;
			}
		}
//Fix original 
		powers[resultPos]=alpha;
		pos=pos-1;
		Instance inst;
		for(int i=0;i<data.numInstances();i++)
		{
			inst=data.instance(i);
			inst.setValue(pos,Math.pow(originalData[i],alpha));
		}
		return data;
	}

Source File: LexiconDistantSupervision.java From AffectiveTweets with GNU General Public License v3.0

4 votes

@Override
protected Instances process(Instances instances) throws Exception {


	// set upper value for text index
	m_textIndex.setUpper(instances.numAttributes() - 1);

	Instances result = getOutputFormat();

	// reference to the content of the message, users index start from zero
	Attribute attrCont = instances.attribute(this.m_textIndex.getIndex());


	for (int i = 0; i < instances.numInstances(); i++) {

		boolean addTweet=false;

		String content = instances.instance(i).stringValue(attrCont);

		ArrayList<String> posWords=new ArrayList<String>();
		ArrayList<String> negWords=new ArrayList<String>();

		this.m_tokenizer.tokenize(content);
		for(;this.m_tokenizer.hasMoreElements();){
			String word=this.m_tokenizer.nextElement();
			if(this.lex.getNomDict().containsKey(word)){
				String value=this.lex.getNomDict().get(word).get(this.polarityAttName);
				if(value.equals(this.polarityAttPosValName))
					posWords.add(word);
				else if(value.equals(this.polarityAttNegValName))
					negWords.add(word);
			}

		}

		if(posWords.size()>0 && negWords.size()==0){
			addTweet=true;				
			// the matching words are removed from the content if flag is set
			if(this.removeMatchingWord)
				content=content.replaceAll(patternFromList(posWords), "");			
		}
		else if(	negWords.size()>0 && posWords.size()==0){
			addTweet=true;
			if(this.removeMatchingWord)
				content=content.replaceAll(patternFromList(negWords), "");				
		}

		if(addTweet){
			double[] values = new double[result.numAttributes()];

			// copy other attributes
			for (int n = 0; n < instances.numAttributes(); n++){
				if(n!=this.m_textIndex.getIndex())
					values[n] = instances.instance(i).value(n);
			}

			// add the content
			values[this.m_textIndex.getIndex()]= attrCont.addStringValue(content);

			// label tweet according to the word's polarity
			if(posWords.size()>0)
				values[result.numAttributes()-1]=1;
			else
				values[result.numAttributes()-1]=0;


			Instance inst = new SparseInstance(1, values);

			inst.setDataset(result);

			// copy possible strings, relational values...
			copyValues(inst, false, instances, result);

			result.add(inst);

		}

	}

	return result;
}

Java Code Examples for weka.core.Instances#numInstances()