weka.core.AttributeStats Java Exaples

Source File: Cobweb.java From tsml with GNU General Public License v3.0

5 votes

/**
    * Update attribute stats using the supplied instance. 
    *
    * @param updateInstance the instance for updating
    * @param delete true if the values of the supplied instance are
    * to be removed from the statistics
    */
   protected void updateStats(Instance updateInstance, 
		       boolean delete) {

     if (m_attStats == null) {
m_attStats = new AttributeStats[m_numAttributes];
for (int i = 0; i < m_numAttributes; i++) {
  m_attStats[i] = new AttributeStats();
  if (m_clusterInstances.attribute(i).isNominal()) {
    m_attStats[i].nominalCounts = 
      new int [m_clusterInstances.attribute(i).numValues()];
  } else {
    m_attStats[i].numericStats = new Stats();
  }
}
     }
     for (int i = 0; i < m_numAttributes; i++) {
if (!updateInstance.isMissing(i)) {
  double value = updateInstance.value(i);
  if (m_clusterInstances.attribute(i).isNominal()) {
    m_attStats[i].nominalCounts[(int)value] += (delete) ? 
      (-1.0 * updateInstance.weight()) : 
      updateInstance.weight();
    m_attStats[i].totalCount += (delete) ?
      (-1.0 * updateInstance.weight()) :
      updateInstance.weight();
  } else {
    if (delete) {
      m_attStats[i].numericStats.subtract(value, 
					  updateInstance.weight());
    } else {
      m_attStats[i].numericStats.add(value, updateInstance.weight());
    }
  }
}
     }
     m_totalInstances += (delete) 
? (-1.0 * updateInstance.weight()) 
: (updateInstance.weight());
   }

Source File: Chopper.java From collective-classification-weka-package with GNU General Public License v3.0

5 votes

/**
 * builds the classifier
 * 
 * @throws Exception	if something goes wrong
 */
@Override
protected void build() throws Exception {
  AttributeStats        stats;
  int                   i;
  
  // determine class distribution
  m_ClassDistribution = new double[2];
  stats = m_Trainset.attributeStats(m_Trainset.classIndex());
  for (i = 0; i < 2; i++)
    m_ClassDistribution[i] = stats.nominalCounts[i] / stats.totalCount;

  // the number of instances added to the training set in each iteration
  m_InstancesPerIteration =   (double) m_Testset.numInstances() 
                            / getFolds();
  if (getDebug())
    System.out.println("InstancesPerIteration: " + m_InstancesPerIteration);

  // build classifier
  m_Random = new Random(getSeed());
  for (i = 0; i <= getFolds(); i++) {
    if (getVerbose() || getDebug()) {
      if (getCutOff() > 0)
        System.out.println(   "\nFold " + i + "/" + getFolds() 
                            + " (CutOff at " + getCutOff() + ")");
      else
        System.out.println("\nFold " + i + "/" + getFolds());
    }
    buildTrainSet(i);
    buildClassifier();
    
    // cutoff of folds reached?
    if ( (i > 0) && (i == getCutOff()) )
      break;
  }
}

Source File: DecisionTreeNode.java From collective-classification-weka-package with GNU General Public License v3.0

5 votes

/**
 * sets the class probabilities based on the given data
 * 
 * @param data	the data to get the class probabilities from
 */
public void setClassProbabilities(Instances data) {
  AttributeStats	stats;
  int			total;
  int			i;
  
  stats = data.attributeStats(data.classIndex());
  total = Utils.sum(stats.nominalCounts);
  m_ClassProbs = new double[data.classAttribute().numValues()];
  for (i = 0; i < m_ClassProbs.length; i++)
    m_ClassProbs[i] = (double) stats.nominalCounts[i] / (double) total;
}

Source File: CollectiveInstances.java From collective-classification-weka-package with GNU General Public License v3.0

5 votes

/**
 * randomly initializes the class labels in the given set according to the
 * class distribution in the training set
 * @param train       the training instances to retrieve the class
 *                    distribution from
 * @param instances   the instances to initialize
 * @param from        the first instance to initialize
 * @param count       the number of instances to initialize
 * @return            the initialize instances
 * @throws Exception  if something goes wrong
 */
public Instances initializeLabels( Instances train, Instances instances, 
                                   int from, int count )
  throws Exception {
    
  int             i;
  AttributeStats  stats;
  Attribute       classAttr;
  double          percentage;
  
  // reset flip count
  m_FlippedLabels = 0;
  
  // explicitly set labels to "missing"
  for (i = from; i < from + count; i++)
    instances.instance(i).setClassMissing();
  
  // determining the percentage of the first class
  stats      = train.attributeStats(train.classIndex());
  percentage = (double) stats.nominalCounts[0] / (double) stats.totalCount;
  
  // set lables
  classAttr = instances.attribute(instances.classIndex());
  for (i = from; i < from + count; i++) {
    // random class
    if (m_Random.nextDouble() < percentage)
      instances.instance(i).setClassValue(classAttr.value(0));
    else
      instances.instance(i).setClassValue(classAttr.value(1));
  }

  return instances;
}

Source File: CobWeb.java From moa with GNU General Public License v3.0

5 votes

/**
 * Update attribute stats using the supplied instance.
 *
 * @param updateInstance the instance for updating
 * @param delete true if the values of the supplied instance are
 * to be removed from the statistics
 */
protected void updateStats(Instance updateInstance,
        boolean delete) {

    if (m_attStats == null) {
        m_attStats = new AttributeStats[m_numAttributes];
        for (int i = 0; i < m_numAttributes; i++) {
            m_attStats[i] = new AttributeStats();
            if (m_clusterInstances.attribute(i).isNominal()) {
                m_attStats[i].nominalCounts =
                        new int[m_clusterInstances.attribute(i).numValues()];
            } else {
                m_attStats[i].numericStats = new Stats();
            }
        }
    }
    for (int i = 0; i < m_numAttributes; i++) {
        if (!updateInstance.isMissing(i)) {
            double value = updateInstance.value(i);
            if (m_clusterInstances.attribute(i).isNominal()) {
                m_attStats[i].nominalCounts[(int) value] += (delete)
                        ? (-1.0 * updateInstance.weight())
                        : updateInstance.weight();
                m_attStats[i].totalCount += (delete)
                        ? (-1.0 * updateInstance.weight())
                        : updateInstance.weight();
            } else {
                if (delete) {
                    m_attStats[i].numericStats.subtract(value,
                            updateInstance.weight());
                } else {
                    m_attStats[i].numericStats.add(value, updateInstance.weight());
                }
            }
        }
    }
    m_totalInstances += (delete)
            ? (-1.0 * updateInstance.weight())
            : (updateInstance.weight());
}

Source File: Apriori.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Removes columns that are all missing from the data
 * 
 * @param instances the instances
 * @return a new set of instances with all missing columns removed
 * @throws Exception if something goes wrong
 */
protected Instances removeMissingColumns(Instances instances)
    throws Exception {

  int numInstances = instances.numInstances();
  StringBuffer deleteString = new StringBuffer();
  int removeCount = 0;
  boolean first = true;
  int maxCount = 0;

  for (int i = 0; i < instances.numAttributes(); i++) {
    AttributeStats as = instances.attributeStats(i);
    if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
      // see if we can decrease this by looking for the most frequent value
      int[] counts = as.nominalCounts;
      if (counts[Utils.maxIndex(counts)] > maxCount) {
        maxCount = counts[Utils.maxIndex(counts)];
      }
    }
    if (as.missingCount == numInstances) {
      if (first) {
        deleteString.append((i + 1));
        first = false;
      } else {
        deleteString.append("," + (i + 1));
      }
      removeCount++;
    }
  }
  if (m_verbose) {
    System.err.println("Removed : " + removeCount
        + " columns with all missing " + "values.");
  }
  if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
    m_upperBoundMinSupport = (double) maxCount / (double) numInstances;
    if (m_verbose) {
      System.err.println("Setting upper bound min support to : "
          + m_upperBoundMinSupport);
    }
  }

  if (deleteString.toString().length() > 0) {
    Remove af = new Remove();
    af.setAttributeIndices(deleteString.toString());
    af.setInvertSelection(false);
    af.setInputFormat(instances);
    Instances newInst = Filter.useFilter(instances, af);

    return newInst;
  }
  return instances;
}

Source File: RemoveUseless.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Signify that this batch of input to the filter is finished.
  *
  * @return true if there are instances pending output
  * @throws Exception if no input format defined
  */  
 public boolean batchFinished() throws Exception {

   if (getInputFormat() == null) {
     throw new IllegalStateException("No input instance format defined");
   }
   if (m_removeFilter == null) {

     // establish attributes to remove from first batch

     Instances toFilter = getInputFormat();
     int[] attsToDelete = new int[toFilter.numAttributes()];
     int numToDelete = 0;
     for(int i = 0; i < toFilter.numAttributes(); i++) {
if (i==toFilter.classIndex()) continue; // skip class
AttributeStats stats = toFilter.attributeStats(i);
if (stats.missingCount == toFilter.numInstances()) {
  attsToDelete[numToDelete++] = i;
} else if (stats.distinctCount < 2) {
  // remove constant attributes
  attsToDelete[numToDelete++] = i;
} else if (toFilter.attribute(i).isNominal()) {
  // remove nominal attributes that vary too much
  double variancePercent = (double) stats.distinctCount
    / (double)(stats.totalCount - stats.missingCount) * 100.0;
  if (variancePercent > m_maxVariancePercentage) {
      attsToDelete[numToDelete++] = i;
  }
}
     }
     
     int[] finalAttsToDelete = new int[numToDelete];
     System.arraycopy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete);
     
     m_removeFilter = new Remove();
     m_removeFilter.setAttributeIndicesArray(finalAttsToDelete);
     m_removeFilter.setInvertSelection(false);
     m_removeFilter.setInputFormat(toFilter);
     
     for (int i = 0; i < toFilter.numInstances(); i++) {
m_removeFilter.input(toFilter.instance(i));
     }
     m_removeFilter.batchFinished();

     Instance processed;
     Instances outputDataset = m_removeFilter.getOutputFormat();
   
     // restore old relation name to hide attribute filter stamp
     outputDataset.setRelationName(toFilter.relationName());
   
     setOutputFormat(outputDataset);
     while ((processed = m_removeFilter.output()) != null) {
processed.setDataset(outputDataset);
push(processed);
     }
   }
   flushInput();
   
   m_NewBatch = true;
   return (numPendingOutput() != 0);
 }

Source File: RemoveFrequentValues.java From tsml with GNU General Public License v3.0

4 votes

/**
 * determines the values to retain, it is always at least 1
 * and up to the maximum number of distinct values
 * 
 * @param inst the Instances to determine the values from which are kept  
 */
public void determineValues(Instances inst) {
   int					i;
   AttributeStats		stats;
   int					attIdx;
   int					min;
   int					max;
   int					count;

   m_AttIndex.setUpper(inst.numAttributes() - 1);
   attIdx = m_AttIndex.getIndex();
   
   // init names
   m_Values = new HashSet();
   
   if (inst == null)
      return;
   
   // number of values to retain
   stats = inst.attributeStats(attIdx);
   if (m_Invert)
      count = stats.nominalCounts.length - m_NumValues;
   else
      count = m_NumValues;
   // out of bounds? -> fix
   if (count < 1)
      count = 1;  // at least one value!
   if (count > stats.nominalCounts.length)
      count = stats.nominalCounts.length;  // at max the existing values
   
   // determine min/max occurences
   Arrays.sort(stats.nominalCounts);
   if (m_LeastValues) {
      min = stats.nominalCounts[0];
      max = stats.nominalCounts[count - 1];
   }
   else {
      min = stats.nominalCounts[(stats.nominalCounts.length - 1) - count + 1];
      max = stats.nominalCounts[stats.nominalCounts.length - 1];
   }
   
   // add values if they are inside min/max (incl. borders) and not more than count
   stats = inst.attributeStats(attIdx);
   for (i = 0; i < stats.nominalCounts.length; i++) {
      if ( (stats.nominalCounts[i] >= min) && (stats.nominalCounts[i] <= max) && (m_Values.size() < count) )
         m_Values.add(inst.attribute(attIdx).value(i));
   }
}

Source File: CollectiveForest.java From collective-classification-weka-package with GNU General Public License v3.0

4 votes

/**
 * here initialization and building, possible iterations will happen
 * 
 * @throws Exception	if something goes wrong
 */
@Override
protected void build() throws Exception {
  AttributeStats        stats;
  int                   i;
  
  // determine number of features to be selected
  m_KValue = getNumFeatures();
  if (m_KValue < 1) 
    m_KValue = (int) Utils.log2(m_Trainset.numAttributes()) + 1;

  // determine class distribution
  m_ClassDistribution = new double[2];
  stats = m_Trainset.attributeStats(m_Trainset.classIndex());
  for (i = 0; i < 2; i++) {
    if (stats.totalCount > 0)
      m_ClassDistribution[i] = stats.nominalCounts[i] / stats.totalCount;
    else
      m_ClassDistribution[i] = 0;
  }

  // the number of instances added to the training set in each iteration
  m_InstancesPerIteration =   (double) m_Testset.numInstances() 
                            / getFolds();
  if (getDebug())
    System.out.println("InstancesPerIteration: " + m_InstancesPerIteration);

  // build list of sorted test instances
  m_List = new RankedList(m_Testset, m_ClassDistribution);

  // build classifier
  m_Random = new Random(getSeed());
  for (i = 0; i <= getFolds(); i++) {
    if (getVerbose()) {
      if (getCutOff() > 0)
        System.out.println(   "\nFold " + i + "/" + getFolds() 
                            + " (CutOff at " + getCutOff() + ")");
      else
        System.out.println("\nFold " + i + "/" + getFolds());
    }
    buildTrainSet(i);
    buildClassifier();
    
    // cutoff of folds reached?
    if ( (i > 0) && (i == getCutOff()) )
      break;
  }
}

weka.core.AttributeStats Java Examples