weka.core.AttributeStats Java Examples
The following examples show how to use
weka.core.AttributeStats.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Cobweb.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Update attribute stats using the supplied instance. * * @param updateInstance the instance for updating * @param delete true if the values of the supplied instance are * to be removed from the statistics */ protected void updateStats(Instance updateInstance, boolean delete) { if (m_attStats == null) { m_attStats = new AttributeStats[m_numAttributes]; for (int i = 0; i < m_numAttributes; i++) { m_attStats[i] = new AttributeStats(); if (m_clusterInstances.attribute(i).isNominal()) { m_attStats[i].nominalCounts = new int [m_clusterInstances.attribute(i).numValues()]; } else { m_attStats[i].numericStats = new Stats(); } } } for (int i = 0; i < m_numAttributes; i++) { if (!updateInstance.isMissing(i)) { double value = updateInstance.value(i); if (m_clusterInstances.attribute(i).isNominal()) { m_attStats[i].nominalCounts[(int)value] += (delete) ? (-1.0 * updateInstance.weight()) : updateInstance.weight(); m_attStats[i].totalCount += (delete) ? (-1.0 * updateInstance.weight()) : updateInstance.weight(); } else { if (delete) { m_attStats[i].numericStats.subtract(value, updateInstance.weight()); } else { m_attStats[i].numericStats.add(value, updateInstance.weight()); } } } } m_totalInstances += (delete) ? (-1.0 * updateInstance.weight()) : (updateInstance.weight()); }
Example #2
Source File: Chopper.java From collective-classification-weka-package with GNU General Public License v3.0 | 5 votes |
/** * builds the classifier * * @throws Exception if something goes wrong */ @Override protected void build() throws Exception { AttributeStats stats; int i; // determine class distribution m_ClassDistribution = new double[2]; stats = m_Trainset.attributeStats(m_Trainset.classIndex()); for (i = 0; i < 2; i++) m_ClassDistribution[i] = stats.nominalCounts[i] / stats.totalCount; // the number of instances added to the training set in each iteration m_InstancesPerIteration = (double) m_Testset.numInstances() / getFolds(); if (getDebug()) System.out.println("InstancesPerIteration: " + m_InstancesPerIteration); // build classifier m_Random = new Random(getSeed()); for (i = 0; i <= getFolds(); i++) { if (getVerbose() || getDebug()) { if (getCutOff() > 0) System.out.println( "\nFold " + i + "/" + getFolds() + " (CutOff at " + getCutOff() + ")"); else System.out.println("\nFold " + i + "/" + getFolds()); } buildTrainSet(i); buildClassifier(); // cutoff of folds reached? if ( (i > 0) && (i == getCutOff()) ) break; } }
Example #3
Source File: DecisionTreeNode.java From collective-classification-weka-package with GNU General Public License v3.0 | 5 votes |
/** * sets the class probabilities based on the given data * * @param data the data to get the class probabilities from */ public void setClassProbabilities(Instances data) { AttributeStats stats; int total; int i; stats = data.attributeStats(data.classIndex()); total = Utils.sum(stats.nominalCounts); m_ClassProbs = new double[data.classAttribute().numValues()]; for (i = 0; i < m_ClassProbs.length; i++) m_ClassProbs[i] = (double) stats.nominalCounts[i] / (double) total; }
Example #4
Source File: CollectiveInstances.java From collective-classification-weka-package with GNU General Public License v3.0 | 5 votes |
/** * randomly initializes the class labels in the given set according to the * class distribution in the training set * @param train the training instances to retrieve the class * distribution from * @param instances the instances to initialize * @param from the first instance to initialize * @param count the number of instances to initialize * @return the initialize instances * @throws Exception if something goes wrong */ public Instances initializeLabels( Instances train, Instances instances, int from, int count ) throws Exception { int i; AttributeStats stats; Attribute classAttr; double percentage; // reset flip count m_FlippedLabels = 0; // explicitly set labels to "missing" for (i = from; i < from + count; i++) instances.instance(i).setClassMissing(); // determining the percentage of the first class stats = train.attributeStats(train.classIndex()); percentage = (double) stats.nominalCounts[0] / (double) stats.totalCount; // set lables classAttr = instances.attribute(instances.classIndex()); for (i = from; i < from + count; i++) { // random class if (m_Random.nextDouble() < percentage) instances.instance(i).setClassValue(classAttr.value(0)); else instances.instance(i).setClassValue(classAttr.value(1)); } return instances; }
Example #5
Source File: CobWeb.java From moa with GNU General Public License v3.0 | 5 votes |
/** * Update attribute stats using the supplied instance. * * @param updateInstance the instance for updating * @param delete true if the values of the supplied instance are * to be removed from the statistics */ protected void updateStats(Instance updateInstance, boolean delete) { if (m_attStats == null) { m_attStats = new AttributeStats[m_numAttributes]; for (int i = 0; i < m_numAttributes; i++) { m_attStats[i] = new AttributeStats(); if (m_clusterInstances.attribute(i).isNominal()) { m_attStats[i].nominalCounts = new int[m_clusterInstances.attribute(i).numValues()]; } else { m_attStats[i].numericStats = new Stats(); } } } for (int i = 0; i < m_numAttributes; i++) { if (!updateInstance.isMissing(i)) { double value = updateInstance.value(i); if (m_clusterInstances.attribute(i).isNominal()) { m_attStats[i].nominalCounts[(int) value] += (delete) ? (-1.0 * updateInstance.weight()) : updateInstance.weight(); m_attStats[i].totalCount += (delete) ? (-1.0 * updateInstance.weight()) : updateInstance.weight(); } else { if (delete) { m_attStats[i].numericStats.subtract(value, updateInstance.weight()); } else { m_attStats[i].numericStats.add(value, updateInstance.weight()); } } } } m_totalInstances += (delete) ? (-1.0 * updateInstance.weight()) : (updateInstance.weight()); }
Example #6
Source File: Apriori.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Removes columns that are all missing from the data * * @param instances the instances * @return a new set of instances with all missing columns removed * @throws Exception if something goes wrong */ protected Instances removeMissingColumns(Instances instances) throws Exception { int numInstances = instances.numInstances(); StringBuffer deleteString = new StringBuffer(); int removeCount = 0; boolean first = true; int maxCount = 0; for (int i = 0; i < instances.numAttributes(); i++) { AttributeStats as = instances.attributeStats(i); if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) { // see if we can decrease this by looking for the most frequent value int[] counts = as.nominalCounts; if (counts[Utils.maxIndex(counts)] > maxCount) { maxCount = counts[Utils.maxIndex(counts)]; } } if (as.missingCount == numInstances) { if (first) { deleteString.append((i + 1)); first = false; } else { deleteString.append("," + (i + 1)); } removeCount++; } } if (m_verbose) { System.err.println("Removed : " + removeCount + " columns with all missing " + "values."); } if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) { m_upperBoundMinSupport = (double) maxCount / (double) numInstances; if (m_verbose) { System.err.println("Setting upper bound min support to : " + m_upperBoundMinSupport); } } if (deleteString.toString().length() > 0) { Remove af = new Remove(); af.setAttributeIndices(deleteString.toString()); af.setInvertSelection(false); af.setInputFormat(instances); Instances newInst = Filter.useFilter(instances, af); return newInst; } return instances; }
Example #7
Source File: RemoveUseless.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Signify that this batch of input to the filter is finished. * * @return true if there are instances pending output * @throws Exception if no input format defined */ public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_removeFilter == null) { // establish attributes to remove from first batch Instances toFilter = getInputFormat(); int[] attsToDelete = new int[toFilter.numAttributes()]; int numToDelete = 0; for(int i = 0; i < toFilter.numAttributes(); i++) { if (i==toFilter.classIndex()) continue; // skip class AttributeStats stats = toFilter.attributeStats(i); if (stats.missingCount == toFilter.numInstances()) { attsToDelete[numToDelete++] = i; } else if (stats.distinctCount < 2) { // remove constant attributes attsToDelete[numToDelete++] = i; } else if (toFilter.attribute(i).isNominal()) { // remove nominal attributes that vary too much double variancePercent = (double) stats.distinctCount / (double)(stats.totalCount - stats.missingCount) * 100.0; if (variancePercent > m_maxVariancePercentage) { attsToDelete[numToDelete++] = i; } } } int[] finalAttsToDelete = new int[numToDelete]; System.arraycopy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete); m_removeFilter = new Remove(); m_removeFilter.setAttributeIndicesArray(finalAttsToDelete); m_removeFilter.setInvertSelection(false); m_removeFilter.setInputFormat(toFilter); for (int i = 0; i < toFilter.numInstances(); i++) { m_removeFilter.input(toFilter.instance(i)); } m_removeFilter.batchFinished(); Instance processed; Instances outputDataset = m_removeFilter.getOutputFormat(); // restore old relation name to hide attribute filter stamp outputDataset.setRelationName(toFilter.relationName()); setOutputFormat(outputDataset); while ((processed = m_removeFilter.output()) != null) { processed.setDataset(outputDataset); push(processed); } } flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); }
Example #8
Source File: RemoveFrequentValues.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * determines the values to retain, it is always at least 1 * and up to the maximum number of distinct values * * @param inst the Instances to determine the values from which are kept */ public void determineValues(Instances inst) { int i; AttributeStats stats; int attIdx; int min; int max; int count; m_AttIndex.setUpper(inst.numAttributes() - 1); attIdx = m_AttIndex.getIndex(); // init names m_Values = new HashSet(); if (inst == null) return; // number of values to retain stats = inst.attributeStats(attIdx); if (m_Invert) count = stats.nominalCounts.length - m_NumValues; else count = m_NumValues; // out of bounds? -> fix if (count < 1) count = 1; // at least one value! if (count > stats.nominalCounts.length) count = stats.nominalCounts.length; // at max the existing values // determine min/max occurences Arrays.sort(stats.nominalCounts); if (m_LeastValues) { min = stats.nominalCounts[0]; max = stats.nominalCounts[count - 1]; } else { min = stats.nominalCounts[(stats.nominalCounts.length - 1) - count + 1]; max = stats.nominalCounts[stats.nominalCounts.length - 1]; } // add values if they are inside min/max (incl. borders) and not more than count stats = inst.attributeStats(attIdx); for (i = 0; i < stats.nominalCounts.length; i++) { if ( (stats.nominalCounts[i] >= min) && (stats.nominalCounts[i] <= max) && (m_Values.size() < count) ) m_Values.add(inst.attribute(attIdx).value(i)); } }
Example #9
Source File: CollectiveForest.java From collective-classification-weka-package with GNU General Public License v3.0 | 4 votes |
/** * here initialization and building, possible iterations will happen * * @throws Exception if something goes wrong */ @Override protected void build() throws Exception { AttributeStats stats; int i; // determine number of features to be selected m_KValue = getNumFeatures(); if (m_KValue < 1) m_KValue = (int) Utils.log2(m_Trainset.numAttributes()) + 1; // determine class distribution m_ClassDistribution = new double[2]; stats = m_Trainset.attributeStats(m_Trainset.classIndex()); for (i = 0; i < 2; i++) { if (stats.totalCount > 0) m_ClassDistribution[i] = stats.nominalCounts[i] / stats.totalCount; else m_ClassDistribution[i] = 0; } // the number of instances added to the training set in each iteration m_InstancesPerIteration = (double) m_Testset.numInstances() / getFolds(); if (getDebug()) System.out.println("InstancesPerIteration: " + m_InstancesPerIteration); // build list of sorted test instances m_List = new RankedList(m_Testset, m_ClassDistribution); // build classifier m_Random = new Random(getSeed()); for (i = 0; i <= getFolds(); i++) { if (getVerbose()) { if (getCutOff() > 0) System.out.println( "\nFold " + i + "/" + getFolds() + " (CutOff at " + getCutOff() + ")"); else System.out.println("\nFold " + i + "/" + getFolds()); } buildTrainSet(i); buildClassifier(); // cutoff of folds reached? if ( (i > 0) && (i == getCutOff()) ) break; } }