Java Code Examples for weka.core.Instances#sort()
The following examples show how to use
weka.core.Instances#sort() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BinC45Split.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Creates a C4.5-type split on the given data. * * @exception Exception if something goes wrong */ public void buildClassifier(Instances trainInstances) throws Exception { // Initialize the remaining instance variables. m_numSubsets = 0; m_splitPoint = Double.MAX_VALUE; m_infoGain = 0; m_gainRatio = 0; // Different treatment for enumerated and numeric // attributes. if (trainInstances.attribute(m_attIndex).isNominal()){ handleEnumeratedAttribute(trainInstances); }else{ trainInstances.sort(trainInstances.attribute(m_attIndex)); handleNumericAttribute(trainInstances); } }
Example 2
Source File: NBTreeSplit.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Creates a NBTree-type split on the given data. Assumes that none of * the class values is missing. * * @exception Exception if something goes wrong */ public void buildClassifier(Instances trainInstances) throws Exception { // Initialize the remaining instance variables. m_numSubsets = 0; m_splitPoint = Double.MAX_VALUE; m_errors = 0; if (m_globalNB != null) { m_errors = m_globalNB.getErrors(); } // Different treatment for enumerated and numeric // attributes. if (trainInstances.attribute(m_attIndex).isNominal()) { m_complexityIndex = trainInstances.attribute(m_attIndex).numValues(); handleEnumeratedAttribute(trainInstances); }else{ m_complexityIndex = 2; trainInstances.sort(trainInstances.attribute(m_attIndex)); handleNumericAttribute(trainInstances); } }
Example 3
Source File: C45Split.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Creates a C4.5-type split on the given data. Assumes that none of * the class values is missing. * * @exception Exception if something goes wrong */ public void buildClassifier(Instances trainInstances) throws Exception { // Initialize the remaining instance variables. m_numSubsets = 0; m_splitPoint = Double.MAX_VALUE; m_infoGain = 0; m_gainRatio = 0; // Different treatment for enumerated and numeric // attributes. if (trainInstances.attribute(m_attIndex).isNominal()) { m_complexityIndex = trainInstances.attribute(m_attIndex).numValues(); m_index = m_complexityIndex; handleEnumeratedAttribute(trainInstances); }else{ m_complexityIndex = 2; m_index = 0; trainInstances.sort(trainInstances.attribute(m_attIndex)); handleNumericAttribute(trainInstances); } }
Example 4
Source File: Discretize.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Set cutpoints for a single attribute using MDL. * * @param index the index of the attribute to set cutpoints for * @param data the data to work with */ protected void calculateCutPointsByMDL(int index, Instances data) { // Sort instances data.sort(data.attribute(index)); // Find first instances that's missing int firstMissing = data.numInstances(); for (int i = 0; i < data.numInstances(); i++) { if (data.instance(i).isMissing(index)) { firstMissing = i; break; } } m_CutPoints[index] = cutPointsForSubset(data, index, 0, firstMissing); }
Example 5
Source File: Sampling.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Reorder the dataset by its largest class * @param data * @return */ public static Instances orderByLargestClass(Instances data) { Instances newData = new Instances(data, data.numInstances()); // get the number of class in the data int nbClass = data.numClasses(); int[] instancePerClass = new int[nbClass]; int[] labels = new int[nbClass]; int[] classIndex = new int[nbClass]; // sort the data base on its class data.sort(data.classAttribute()); // get the number of instances per class in the data for (int i = 0; i < nbClass; i++) { instancePerClass[i] = data.attributeStats(data.classIndex()).nominalCounts[i]; labels[i] = i; if (i > 0) classIndex[i] = classIndex[i-1] + instancePerClass[i-1]; } QuickSort.sort(instancePerClass, labels); for (int i = nbClass-1; i >=0 ; i--) { for (int j = 0; j < instancePerClass[i]; j++) { newData.add(data.instance(classIndex[labels[i]] + j)); } } return newData; }
Example 6
Source File: Segmenter.java From gsn with GNU General Public License v3.0 | 5 votes |
public SegmentedClassifier computeErrors(Instances i,Double[] seg) throws Exception{ Classifier cl = Tools.getClassifierById(model); Filter f = new DummyFilter(); f.setInputFormat(i); SegmentedClassifier sc = new SegmentedClassifier(cl, 1, seg,f); sc.buildClassifier(i); i.sort(0); Pred_errors = Tools.get_errors(sc, i); return sc; }
Example 7
Source File: Sampling.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Reorder the data by compactness of each class using Euclidean distance * @param data * @return */ public static Instances orderByCompactClass(Instances data) { Instances newData = new Instances(data, data.numInstances()); // get the number of class in the data int nbClass = data.numClasses(); int[] instancePerClass = new int[nbClass]; int[] labels = new int[nbClass]; int[] classIndex = new int[nbClass]; double[] compactness = new double[nbClass]; // sort the data base on its class data.sort(data.classAttribute()); int start = 0; // get the number of instances per class in the data for (int i = 0; i < nbClass; i++) { instancePerClass[i] = data.attributeStats(data.classIndex()).nominalCounts[i]; labels[i] = i; if (i > 0) classIndex[i] = classIndex[i-1] + instancePerClass[i-1]; int end = start + instancePerClass[i]; int counter = 0; double[][] dataPerClass = new double[instancePerClass[i]][data.numAttributes()-1]; for (int j = start; j < end; j++) { dataPerClass[counter++] = data.instance(j).toDoubleArray(); } double[] mean = arithmeticMean(dataPerClass); double d = 0; for (int j = 0; j < instancePerClass[i]; j++) { double temp = euclideanDistance(mean, dataPerClass[j]); temp *= temp; temp -= (mean[0] - dataPerClass[j][0]) * (mean[0] - dataPerClass[j][0]); d += temp; } compactness[i] = d / instancePerClass[i]; start = end; } QuickSort.sort(compactness, labels); for (int i = nbClass-1; i >=0 ; i--) { for (int j = 0; j < instancePerClass[labels[i]]; j++) { newData.add(data.instance(classIndex[labels[i]] + j)); } } return newData; }
Example 8
Source File: ResidualSplit.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Selects split point for numeric attribute. */ protected boolean getSplitPoint() throws Exception{ //compute possible split points double[] splitPoints = new double[m_numInstances]; int numSplitPoints = 0; Instances sortedData = new Instances(m_data); sortedData.sort(sortedData.attribute(m_attIndex)); double last, current; last = sortedData.instance(0).value(m_attIndex); for (int i = 0; i < m_numInstances - 1; i++) { current = sortedData.instance(i+1).value(m_attIndex); if (!Utils.eq(current, last)){ splitPoints[numSplitPoints++] = (last + current) / 2.0; } last = current; } //compute entropy for all split points double[] entropyGain = new double[numSplitPoints]; for (int i = 0; i < numSplitPoints; i++) { m_splitPoint = splitPoints[i]; entropyGain[i] = entropyGain(); } //get best entropy gain int bestSplit = -1; double bestGain = -Double.MAX_VALUE; for (int i = 0; i < numSplitPoints; i++) { if (entropyGain[i] > bestGain) { bestGain = entropyGain[i]; bestSplit = i; } } if (bestSplit < 0) return false; m_splitPoint = splitPoints[bestSplit]; return true; }
Example 9
Source File: IsotonicRegression.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Does the actual regression. */ protected void regress(Attribute attribute, Instances insts, boolean ascending) throws Exception { // Sort values according to current attribute insts.sort(attribute); // Initialize arrays double[] values = new double[insts.numInstances()]; double[] weights = new double[insts.numInstances()]; double[] cuts = new double[insts.numInstances() - 1]; int size = 0; values[0] = insts.instance(0).classValue(); weights[0] = insts.instance(0).weight(); for (int i = 1; i < insts.numInstances(); i++) { if (insts.instance(i).value(attribute) > insts.instance(i - 1).value(attribute)) { cuts[size] = (insts.instance(i).value(attribute) + insts.instance(i - 1).value(attribute)) / 2; size++; } values[size] += insts.instance(i).classValue(); weights[size] += insts.instance(i).weight(); } size++; // While there is a pair of adjacent violators boolean violators; do { violators = false; // Initialize arrays double[] tempValues = new double[size]; double[] tempWeights = new double[size]; double[] tempCuts = new double[size - 1]; // Merge adjacent violators int newSize = 0; tempValues[0] = values[0]; tempWeights[0] = weights[0]; for (int j = 1; j < size; j++) { if ((ascending && (values[j] / weights[j] > tempValues[newSize] / tempWeights[newSize])) || (!ascending && (values[j] / weights[j] < tempValues[newSize] / tempWeights[newSize]))) { tempCuts[newSize] = cuts[j - 1]; newSize++; tempValues[newSize] = values[j]; tempWeights[newSize] = weights[j]; } else { tempWeights[newSize] += weights[j]; tempValues[newSize] += values[j]; violators = true; } } newSize++; // Copy references values = tempValues; weights = tempWeights; cuts = tempCuts; size = newSize; } while (violators); // Compute actual predictions for (int i = 0; i < size; i++) { values[i] /= weights[i]; } // Backup best instance variables Attribute attributeBackedup = m_attribute; double[] cutsBackedup = m_cuts; double[] valuesBackedup = m_values; // Set instance variables to values computed for this attribute m_attribute = attribute; m_cuts = cuts; m_values = values; // Compute sum of squared errors Evaluation eval = new Evaluation(insts); eval.evaluateModel(this, insts); double msq = eval.rootMeanSquaredError(); // Check whether this is the best attribute if (msq < m_minMsq) { m_minMsq = msq; } else { m_attribute = attributeBackedup; m_cuts = cutsBackedup; m_values = valuesBackedup; } }
Example 10
Source File: Discretize.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Set cutpoints for a single attribute. * * @param index the index of the attribute to set cutpoints for */ protected void calculateCutPointsByEqualFrequencyBinning(int index) { // Copy data so that it can be sorted Instances data = new Instances(getInputFormat()); // Sort input data data.sort(index); // Compute weight of instances without missing values double sumOfWeights = 0; for (int i = 0; i < data.numInstances(); i++) { if (data.instance(i).isMissing(index)) { break; } else { sumOfWeights += data.instance(i).weight(); } } double freq; double[] cutPoints = new double[m_NumBins - 1]; if (getDesiredWeightOfInstancesPerInterval() > 0) { freq = getDesiredWeightOfInstancesPerInterval(); cutPoints = new double[(int)(sumOfWeights / freq)]; } else { freq = sumOfWeights / m_NumBins; cutPoints = new double[m_NumBins - 1]; } // Compute break points double counter = 0, last = 0; int cpindex = 0, lastIndex = -1; for (int i = 0; i < data.numInstances() - 1; i++) { // Stop if value missing if (data.instance(i).isMissing(index)) { break; } counter += data.instance(i).weight(); sumOfWeights -= data.instance(i).weight(); // Do we have a potential breakpoint? if (data.instance(i).value(index) < data.instance(i + 1).value(index)) { // Have we passed the ideal size? if (counter >= freq) { // Is this break point worse than the last one? if (((freq - last) < (counter - freq)) && (lastIndex != -1)) { cutPoints[cpindex] = (data.instance(lastIndex).value(index) + data.instance(lastIndex + 1).value(index)) / 2; counter -= last; last = counter; lastIndex = i; } else { cutPoints[cpindex] = (data.instance(i).value(index) + data.instance(i + 1).value(index)) / 2; counter = 0; last = 0; lastIndex = -1; } cpindex++; freq = (sumOfWeights + counter) / ((cutPoints.length + 1) - cpindex); } else { lastIndex = i; last = counter; } } } // Check whether there was another possibility for a cut point if ((cpindex < cutPoints.length) && (lastIndex != -1)) { cutPoints[cpindex] = (data.instance(lastIndex).value(index) + data.instance(lastIndex + 1).value(index)) / 2; cpindex++; } // Did we find any cutpoints? if (cpindex == 0) { m_CutPoints[index] = null; } else { double[] cp = new double[cpindex]; for (int i = 0; i < cpindex; i++) { cp[i] = cutPoints[i]; } m_CutPoints[index] = cp; } }
Example 11
Source File: PropositionalToMultiInstance.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Signify that this batch of input to the filter is finished. * If the filter requires all instances prior to filtering, * output() may now be called to retrieve the filtered instances. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } Instances input = getInputFormat(); input.sort(0); // make sure that bagID is sorted Instances output = getOutputFormat(); Instances bagInsts = output.attribute(1).relation(); Instance inst = new DenseInstance(bagInsts.numAttributes()); inst.setDataset(bagInsts); double bagIndex = input.instance(0).value(0); double classValue = input.instance(0).classValue(); double bagWeight = 0.0; // Convert pending input instances for(int i = 0; i < input.numInstances(); i++) { double currentBagIndex = input.instance(i).value(0); // copy the propositional instance value, except the bagIndex and the class value for (int j = 0; j < input.numAttributes() - 2; j++) inst.setValue(j, input.instance(i).value(j + 1)); inst.setWeight(input.instance(i).weight()); if (currentBagIndex == bagIndex){ bagInsts.add(inst); bagWeight += inst.weight(); } else{ addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight); bagInsts = bagInsts.stringFreeStructure(); bagInsts.add(inst); bagIndex = currentBagIndex; classValue = input.instance(i).classValue(); bagWeight = inst.weight(); } } // reach the last instance, create and add the last bag addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight); if (getRandomize()) output.randomize(new Random(getSeed())); for (int i = 0; i < output.numInstances(); i++) push(output.instance(i)); // Free memory flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); }
Example 12
Source File: SubSample.java From gsn with GNU General Public License v3.0 | 4 votes |
@Override protected Instances process(Instances instances) throws Exception { instances.sort(m_index); Instances output = new Instances(instances); if(instances.numInstances() <= m_ratio){return output;} for(int i=output.numInstances()-1;i>=0;i--){ if((i+1) % m_ratio != 0){output.delete(i);} } //output.compactify(); return output; }