weka.core.Instances#numAttributes

Source File: HillClimber.java From tsml with GNU General Public License v3.0

6 votes

/** 
 * find best (or least bad) arc addition operation
 * 
 * @param bayesNet Bayes network to add arc to
 * @param instances data set
 * @param oBestOperation
 * @return Operation containing best arc to add, or null if no arc addition is allowed 
 * (this can happen if any arc addition introduces a cycle, or all parent sets are filled
 * up to the maximum nr of parents).
 * @throws Exception if something goes wrong
 */
Operation findBestArcToAdd(BayesNet bayesNet, Instances instances, Operation oBestOperation) throws Exception {
	int nNrOfAtts = instances.numAttributes();
	// find best arc to add
	for (int iAttributeHead = 0; iAttributeHead < nNrOfAtts; iAttributeHead++) {
		if (bayesNet.getParentSet(iAttributeHead).getNrOfParents() < m_nMaxNrOfParents) {
			for (int iAttributeTail = 0; iAttributeTail < nNrOfAtts; iAttributeTail++) {
				if (addArcMakesSense(bayesNet, instances, iAttributeHead, iAttributeTail)) {
					Operation oOperation = new Operation(iAttributeTail, iAttributeHead, Operation.OPERATION_ADD);
					double fScore = calcScoreWithExtraParent(oOperation.m_nHead, oOperation.m_nTail);
					if (fScore > oBestOperation.m_fScore) {
						if (isNotTabu(oOperation)) {
							oBestOperation = oOperation;
							oBestOperation.m_fScore = fScore;
						}
					}
				}
			}
		}
	}
	return oBestOperation;
}

Source File: LibLINEAR.java From tsml with GNU General Public License v3.0

6 votes

/**
 * turns on nominal to binary filtering
 * if there are not only numeric attributes
 */
private Instances nominalToBinary( Instances insts ) throws Exception {
  boolean onlyNumeric = true;
  for (int i = 0; i < insts.numAttributes(); i++) {
    if (i != insts.classIndex()) {
      if (!insts.attribute(i).isNumeric()) {
        onlyNumeric = false;
        break;
      }
    }
  }

  if (!onlyNumeric) {
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(insts);
    insts = Filter.useFilter(insts, m_NominalToBinary);
  }
  return insts;
}

Source File: InstanceTools.java From tsml with GNU General Public License v3.0

6 votes

public static double[][] create2DMatrixFromInstances(Instances train, Instances test) {
    double [][] data = new double[train.numInstances() + test.numInstances()][train.numAttributes()];
    
    for(int i=0; i<train.numInstances(); i++)
    {
        for(int j=0; j<train.numAttributes(); j++)
        {
            data[i][j] = train.get(i).value(j);
        }
    }
    
    int index=0;
    for(int i=train.numInstances(); i<train.numInstances()+test.numInstances(); i++)
    {
        for(int j=0; j<test.numAttributes(); j++)
        {
            data[i][j] = test.get(index).value(j);
        }
        ++index;
    }
    
    return data;
}

Source File: TweetToWordListCountFeatureVector.java From AffectiveTweets with GNU General Public License v3.0

6 votes

@Override
protected Instances determineOutputFormat(Instances inputFormat)
		throws Exception {

	ArrayList<Attribute> att = new ArrayList<Attribute>();

	// Adds all attributes of the inputformat
	for (int i = 0; i < inputFormat.numAttributes(); i++) {
		att.add(inputFormat.attribute(i));
	}

	// adds the new attribute
	att.add(new Attribute("wordListCount"));
	
	Instances result = new Instances(inputFormat.relationName(), att, 0);

	// set the class index
	result.setClassIndex(inputFormat.classIndex());

	return result;
}

Source File: InstanceTools.java From tsml with GNU General Public License v3.0

6 votes

public static void removeConstantTrainAttributes(Instances train, Instances test){
        int i=0;
        while(i<train.numAttributes()-1){ //Dont test class
// Test if constant
            int j=1;
            while(j<train.numInstances() && train.instance(j-1).value(i)==train.instance(j).value(i))
                j++;
            if(j==train.numInstances()){
    // Remove from train
                train.deleteAttributeAt(i);
                test.deleteAttributeAt(i);
    // Remove from test            
            }else{
                i++;
            }
        }       
    }

Source File: TunedXGBoost.java From tsml with GNU General Public License v3.0

5 votes

@Override
    public void buildClassifier(Instances insts) throws Exception {
//        long startTime=System.nanoTime(); 
        long startTime=System.nanoTime(); 

        booster = null;
        trainResults =new ClassifierResults();

        trainInsts = new Instances(insts);
        numTrainInsts = insts.numInstances();
        numAtts = insts.numAttributes();
        numClasses = insts.numClasses();

        if(cvFolds>numTrainInsts)
            cvFolds=numTrainInsts;
//        rng = new Random(seed); //for tie resolution etc if needed

        buildActualClassifer();

        if(getEstimateOwnPerformance()&& !tuneParameters) //if tuneparas, will take the cv results of the best para set
            trainResults = estimateTrainAcc(trainInsts);

        if(saveEachParaAcc)
            trainResults.setBuildTime(combinedBuildTime);
        else
            trainResults.setBuildTime(System.nanoTime()-startTime);
//            trainResults.buildTime=System.nanoTime()-startTime;

        trainResults.setTimeUnit(TimeUnit.NANOSECONDS);
        trainResults.setClassifierName(tuneParameters ? "TunedXGBoost" : "XGBoost");
        trainResults.setDatasetName(trainInsts.relationName());
        trainResults.setParas(getParameters());
    }

Source File: EnsembleEvaluatorTest.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Test
public void ensembleEvaluatorTest() throws Exception {
    logger.info("Starting cluster evaluation test...");

    /* load dataset and create a train-test-split */
    OpenmlConnector connector = new OpenmlConnector();
    DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID);
    File file = ds.getDataset(DataSetUtils.API_KEY);
    Instances data = new Instances(new BufferedReader(new FileReader(file)));
    data.setClassIndex(data.numAttributes() - 1);
    List<Instances> split = WekaUtil.getStratifiedSplit(data, 42, .05f);

    Instances insts = split.get(0);

    long timeStart = System.currentTimeMillis();

    ReliefFAttributeEval eval = new ReliefFAttributeEval();
    eval.buildEvaluator(insts);

    long timeStartEval = System.currentTimeMillis();

    double attEvalSum = 0;
    for (int i = 0; i < insts.numAttributes(); i++) {
        attEvalSum += eval.evaluateAttribute(i);
    }
    attEvalSum /= insts.numAttributes();

    long timeTaken = System.currentTimeMillis() - timeStart;
    long timeTakenEval = System.currentTimeMillis() - timeStartEval;

    logger.info("Value: " + attEvalSum);
    Assert.assertTrue(attEvalSum > 0);
    logger.debug("Clustering took " + (timeTaken / 1000) + " s.");
    logger.debug("Clustering eval took " + (timeTakenEval / 1000) + " s.");
}

Source File: PAA.java From tsml with GNU General Public License v3.0

5 votes

@Override
protected Instances determineOutputFormat(Instances inputFormat)
        throws Exception {
    
    //Check all attributes are real valued, otherwise throw exception
    for (int i = 0; i < inputFormat.numAttributes(); i++) {
        if (inputFormat.classIndex() != i) {
            if (!inputFormat.attribute(i).isNumeric()) {
                throw new Exception("Non numeric attribute not allowed for PAA");
            }
        }
    }
    
    //Set up instances size and format. 
    ArrayList<Attribute> attributes = new ArrayList<>();
    
    for (int i = 0; i < numIntervals; i++)
        attributes.add(new Attribute("PAAInterval_" + i));

    if (inputFormat.classIndex() >= 0) {	//Classification set, set class 
        //Get the class values as a fast vector			
        Attribute target = inputFormat.attribute(inputFormat.classIndex());

        ArrayList<String> vals = new ArrayList<>(target.numValues());
        for (int i = 0; i < target.numValues(); i++) {
            vals.add(target.value(i));
        }
        attributes.add(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals));
    }
    
    Instances result = new Instances("PAA" + inputFormat.relationName(), attributes, inputFormat.numInstances());
    if (inputFormat.classIndex() >= 0) {
        result.setClassIndex(result.numAttributes() - 1);
    }
    return result;
}

Source File: PartitionedMultiFilter.java From tsml with GNU General Public License v3.0

5 votes

/**
  * determines the indices of unused attributes (ones that are not covered
  * by any of the range).
  *
  * @param data	the data to base the determination on
  * @see 		#m_IndicesUnused
  */
 protected void determineUnusedIndices(Instances data) {
   Vector<Integer>	indices;
   int			i;
   int			n;
   boolean		covered;

   // traverse all ranges
   indices = new Vector<Integer>();
   for (i = 0; i < data.numAttributes(); i++) {
     if (i == data.classIndex())
continue;

     covered = false;
     for (n = 0; n < getRanges().length; n++) {
if (getRanges()[n].isInRange(i)) {
  covered = true;
  break;
}
     }

     if (!covered)
indices.add(new Integer(i));
   }

   // create array
   m_IndicesUnused = new int[indices.size()];
   for (i = 0; i < indices.size(); i++)
     m_IndicesUnused[i] = indices.get(i).intValue();

   if (getDebug())
     System.out.println(
  "Unused indices: " + Utils.arrayToString(m_IndicesUnused));
 }

Source File: FilteredSubsetEval.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Initializes a filtered attribute evaluator.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully
 */
public void buildEvaluator(Instances data) throws Exception {
  // can evaluator handle data?
  getCapabilities().testWithFail(data);

  // Structure of original
  Instances original = new Instances(data, 0);

  m_filter.setInputFormat(data);
  data = Filter.useFilter(data, m_filter);

  // Can only proceed if filter has not altered the order or
  // number of attributes in the data
  if (data.numAttributes() != original.numAttributes()) {
    throw new Exception("Filter must not alter the number of "
                        +"attributes in the data!");
  }

  // Check the class index (if set)
  if (original.classIndex() >= 0) {
    if (data.classIndex() != original.classIndex()) {
      throw new Exception("Filter must not change the class attribute!");
    }
  }

  // check the order
  for (int i = 0; i < original.numAttributes(); i++) {
    if (!data.attribute(i).name().equals(original.attribute(i).name())) {
      throw new Exception("Filter must not alter the order of the attributes!");
    }
  }

  // can the evaluator handle this data?
  ((ASEvaluation)getSubsetEvaluator()).getCapabilities().testWithFail(data);
  m_filteredInstances = data.stringFreeStructure();
  
  ((ASEvaluation)m_evaluator).buildEvaluator(data);
}

Source File: ModelFactory.java From AIDR with GNU Affero General Public License v3.0

5 votes

private static Instances getTemplateSet(Instances dataSet) {
	ArrayList<Attribute> attributes = new ArrayList<Attribute>(
			dataSet.numAttributes());
	for (int i = 0; i < dataSet.numAttributes(); i++) {
		attributes.add(dataSet.attribute(i));
	}
	Instances specification = new Instances("spec", attributes, 0);
	specification.setClassIndex(specification.numAttributes() - 1);
	return specification;
}

Source File: RotationForest.java From tsml with GNU General Public License v3.0

5 votes

/** 
 * Checks m_MinGroup and m_MaxGroup
 * 
 * @param data the dataset
 */
protected void checkMinMax(Instances data) {
  if( m_MinGroup > m_MaxGroup ) {
    int tmp = m_MaxGroup;
    m_MaxGroup = m_MinGroup;
    m_MinGroup = tmp;
  }
  
  int n = data.numAttributes();
  if( m_MaxGroup >= n )
    m_MaxGroup = n - 1;
  if( m_MinGroup >= n )
    m_MinGroup = n - 1;
}

Source File: Tools.java From gsn with GNU General Public License v3.0

5 votes

/**
 * add a new feature in the dataset containing the predicted values by the classifier
 * @param c the classifier
 * @param i the dataset
 * @throws Exception
 */
public static void add_predictions(Classifier c, Instances i) throws Exception{
	
	double[] computed = new double[i.numInstances()];
	for(int m = 0;m<computed.length;m++){
		computed[m] = c.classifyInstance(i.instance(m)); 
	}
	Attribute a = new Attribute("interpolate");
	int num = i.numAttributes();
	i.insertAttributeAt(a, num);
	for(int m = 0;m<computed.length;m++){
		i.instance(m).setValue(num, computed[m]);
	}
}

Source File: sIB.java From tsml with GNU General Public License v3.0

5 votes

/**
  * Transpose the document-term matrix to term-document matrix
  * @param data instances with document-term info
  * @return a term-document matrix transposed from the input dataset
  */
 private Matrix getTransposedMatrix(Instances data) {
   double[][] temp = new double[data.numAttributes()][data.numInstances()];
   for (int i = 0; i < data.numInstances(); i++) {
     Instance inst = data.instance(i);
     for (int v = 0; v < inst.numValues(); v++) {
temp[inst.index(v)][i] = inst.valueSparse(v);
     }
   }
   Matrix My_x = new Matrix(temp);
   return My_x;
 }

Source File: LinearModel.java From tsml with GNU General Public License v3.0

5 votes

public LinearModel(Instances data)
	{
//Form X and Y from Instances		
		n=data.numInstances();
		m=data.numAttributes();	//includes the constant term
		y = data.attributeToDoubleArray(data.classIndex());
		Y=new Matrix(y,y.length);
		double[][] xt = new double[m][n];
		for(int i=0;i<n;i++)
			xt[0][i]=1;
		for(int i=1;i<m;i++)
			xt[i]=data.attributeToDoubleArray(i-1);
		Xt=new Matrix(xt);
		X=Xt.transpose();
	}

Source File: ARAMNetwork.java From meka with GNU General Public License v3.0

4 votes

/**
  * Generates the classifier.
  *
  * @param instances set of instances serving as training data 
  * @exception Exception if the classifier has not been generated 
  * successfully
  */
  
 public void buildClassifier(Instances D) throws Exception {

	int L = D.classIndex();
	int featlength =  (D.numAttributes() -L)*2;
	int numSamples = D.numInstances();
	int classlength = L * 2;
	if (this.order==null){

		order = new ArrayList<Integer>();
		for (int j=0; j<D.numInstances();j++){
			order.add(j);
		}
	}

	if (numFeatures==-1){
	    initARAM( featlength,classlength ,roa , threshold );
		}else{
	if (featlength != numFeatures) {
		return ;

	}
	if (classlength != numClasses) {
		return ;

	}}

// Copy the instances so we don't mess up the original data.
// Function calls do not deep copy the arguments..
//Instances m_Instances = new Instances(instances);

// Use the enumeration of instances to train classifier.
// Do any sanity checks (e.g., missing attributes etc here
// before calling updateClassifier for the actual learning
//Enumeration enumInsts = D.enumerateInstances();

for(int i=0; i<D.numInstances();i++){
	Instance instance = D.get(order.get(i));
	updateClassifier(instance);
}
   System.out.println("Training done, used "+numCategories+" neurons with rho ="+roa+".");

// Alternatively, you can put the training logic within this method,
// rather than updateClassifier(...). However, if you omit the 
// updateClassifier(...) method, you should remove 
// UpdateableClassifier from the class declaration above.
 }

Source File: ArffLexiconEvaluator.java From AffectiveTweets with GNU General Public License v3.0

4 votes

/**
 * Processes  all the dictionary files.
 * @throws IOException  an IOException will be raised if an invalid file is supplied
 */
public void processDict() throws IOException {
	BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile));
	Instances lexInstances=new Instances(reader);

	// set upper value for word index
	lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1);
	
	List<Attribute> numericAttributes=new ArrayList<Attribute>();
	List<Attribute> nominalAttributes=new ArrayList<Attribute>();



	// checks all numeric and nominal attributes and discards the word attribute
	for(int i=0;i<lexInstances.numAttributes();i++){

		if(i!=this.lexiconWordIndex.getIndex()){
			if(lexInstances.attribute(i).isNumeric() ){
				numericAttributes.add(lexInstances.attribute(i));	
				// adds the attribute name to the message-level features to be calculated
				this.featureNames.add(this.lexiconName+"-"+lexInstances.attribute(i).name());
			}

			else if(lexInstances.attribute(i).isNominal() ){
				nominalAttributes.add(lexInstances.attribute(i));	
				// adds the attribute name together with the nominal value to the message-level features to be calculated
				int numValues=lexInstances.attribute(i).numValues();
				for(int j=0;j<numValues;j++)
					this.featureNames.add(this.lexiconName+"-"+lexInstances.attribute(i).name()+"-"+lexInstances.attribute(i).value(j));

			}

		}

	}


	// Maps all words with their affective scores discarding missing values
	for(Instance inst:lexInstances){
		if(inst.attribute(this.lexiconWordIndex.getIndex()).isString()){
			String word=inst.stringValue(this.lexiconWordIndex.getIndex());
			// stems the word
			word=this.m_stemmer.stem(word);

			// map numeric scores
			if(!numericAttributes.isEmpty()){
				Map<String,Double> wordVals=new HashMap<String,Double>();
				for(Attribute na:numericAttributes){
					if(!weka.core.Utils.isMissingValue(inst.value(na)))
						wordVals.put(na.name(),inst.value(na));
				}
				this.numDict.put(word, wordVals);					
			}

			// map nominal associations
			if(!nominalAttributes.isEmpty()){
				Map<String,String> wordCounts=new HashMap<String,String>();
				for(Attribute no:nominalAttributes){
					if(!weka.core.Utils.isMissingValue(inst.value(no))){	
						wordCounts.put(no.name(),no.value((int) inst.value(no)));
					}

					this.nomDict.put(word, wordCounts);

				}

			}				

		}

	}




}

Source File: WAODE.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Generates the classifier.
  *
  * @param instances set of instances serving as training data
  * @throws Exception if the classifier has not been generated successfully
  */
 public void buildClassifier(Instances instances) throws Exception {
   
   // can classifier handle the data?
   getCapabilities().testWithFail(instances);

   // only class? -> build ZeroR model
   if (instances.numAttributes() == 1) {
     System.err.println(
  "Cannot build model (only class attribute present in data!), "
  + "using ZeroR model instead!");
     m_ZeroR = new weka.classifiers.rules.ZeroR();
     m_ZeroR.buildClassifier(instances);
     return;
   }
   else {
     m_ZeroR = null;
   }

   // reset variable
   m_NumClasses = instances.numClasses();
   m_ClassIndex = instances.classIndex();
   m_NumAttributes = instances.numAttributes();
   m_NumInstances = instances.numInstances();
   m_TotalAttValues = 0;
   
   // allocate space for attribute reference arrays
   m_StartAttIndex = new int[m_NumAttributes];
   m_NumAttValues = new int[m_NumAttributes];
   
   // set the starting index of each attribute and the number of values for
   // each attribute and the total number of values for all attributes (not including class).
   for (int i = 0; i < m_NumAttributes; i++) {
     if (i != m_ClassIndex) {
m_StartAttIndex[i] = m_TotalAttValues;
m_NumAttValues[i] = instances.attribute(i).numValues();
m_TotalAttValues += m_NumAttValues[i];
     }
     else {
m_StartAttIndex[i] = -1;
m_NumAttValues[i] = m_NumClasses;
     }
   }
   
   // allocate space for counts and frequencies
   m_ClassCounts = new double[m_NumClasses];
   m_AttCounts = new double[m_TotalAttValues];
   m_AttAttCounts = new double[m_TotalAttValues][m_TotalAttValues];
   m_ClassAttAttCounts = new double[m_NumClasses][m_TotalAttValues][m_TotalAttValues];
   m_Header = new Instances(instances, 0);
   
   // Calculate the counts
   for (int k = 0; k < m_NumInstances; k++) {
     int classVal=(int)instances.instance(k).classValue();
     m_ClassCounts[classVal] ++;
     int[] attIndex = new int[m_NumAttributes];
     for (int i = 0; i < m_NumAttributes; i++) {
if (i == m_ClassIndex){
  attIndex[i] = -1;
}
else{
  attIndex[i] = m_StartAttIndex[i] + (int)instances.instance(k).value(i);
  m_AttCounts[attIndex[i]]++;
}
     }
     for (int Att1 = 0; Att1 < m_NumAttributes; Att1++) {
if (attIndex[Att1] == -1) continue;
for (int Att2 = 0; Att2 < m_NumAttributes; Att2++) {
  if ((attIndex[Att2] != -1)) {
    m_AttAttCounts[attIndex[Att1]][attIndex[Att2]] ++;
    m_ClassAttAttCounts[classVal][attIndex[Att1]][attIndex[Att2]] ++;
  }
}
     }
   }
   
   //compute mutual information between each attribute and class
   m_mutualInformation=new double[m_NumAttributes];
   for (int att=0;att<m_NumAttributes;att++){
     if (att == m_ClassIndex) continue;
     m_mutualInformation[att]=mutualInfo(att);
   }
 }

Source File: ARAMNetworkSparse.java From meka with GNU General Public License v3.0

4 votes

/**
  * Generates the classifier.
  *
  * @param instances set of instances serving as training data 
  * @exception Exception if the classifier has not been generated 
  * successfully
  */
  
 public void buildClassifier(Instances D) throws Exception {

	int L = D.classIndex();
	int featlength =  (D.numAttributes() -L)*2;
	int numSamples = D.numInstances();
	int classlength = L * 2;
	if (this.order==null){

		order = new ArrayList<Integer>();
		for (int j=0; j<D.numInstances();j++){
			order.add(j);
		}
	}

	if (numFeatures==-1){
	    initARAM( featlength,classlength ,roa , threshold );
		}else{
	if (featlength != numFeatures) {
		return ;

	}
	if (classlength != numClasses) {
		return ;

	}}

// Copy the instances so we don't mess up the original data.
// Function calls do not deep copy the arguments..
//Instances m_Instances = new Instances(instances);

// Use the enumeration of instances to train classifier.
// Do any sanity checks (e.g., missing attributes etc here
// before calling updateClassifier for the actual learning
	for(int i=0; i<D.numInstances();i++){
		Instance instance = D.get(order.get(i));
		updateClassifier(instance);
	}
   System.out.println("Training done, used "+numCategories+" neurons with rho ="+roa+".");

// Alternatively, you can put the training logic within this method,
// rather than updateClassifier(...). However, if you omit the 
// updateClassifier(...) method, you should remove 
// UpdateableClassifier from the class declaration above.
 }

Source File: ArffLexiconWordLabeller.java From AffectiveTweets with GNU General Public License v3.0

2 votes

/**
 * Processes  all the dictionary files.
 * @throws IOException  an IOException will be raised if an invalid file is supplied
 */
public void processDict() throws IOException {
	BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile));
	Instances lexInstances=new Instances(reader);


	// set upper value for word index
	lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1);
	
	// checks all numeric and nominal attributes and discards the word attribute
	for(int i=0;i<lexInstances.numAttributes();i++){

		if(i!=this.lexiconWordIndex.getIndex()){
			if(lexInstances.attribute(i).isNumeric() || lexInstances.attribute(i).isNominal()  ){
				this.attributes.add(lexInstances.attribute(i));
			}

		}

	}


	// Maps all words with their affective scores discarding missing values
	for(Instance inst:lexInstances){
		if(inst.attribute(this.lexiconWordIndex.getIndex()).isString()){
			String word=inst.stringValue(this.lexiconWordIndex.getIndex());
			// stems the word
			word=this.m_stemmer.stem(word);

			// map numeric scores
			if(!attributes.isEmpty()){
				Map<Attribute,Double> wordVals=new HashMap<Attribute,Double>();
				for(Attribute na:attributes){
					wordVals.put(na,inst.value(na));
				}
				this.attValMap.put(word, wordVals);					
			}



		}

	}




}

Java Code Examples for weka.core.Instances#numAttributes()