Java Code Examples for weka.core.Utils#sort()
The following examples show how to use
weka.core.Utils#sort() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ThresholdCurve.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Gets the index of the instance with the closest threshold value to the * desired target * * @param tcurve a set of instances that have been generated by this class * @param threshold the target threshold * @return the index of the instance that has threshold closest to * the target, or -1 if this could not be found (i.e. no data, or * bad threshold target) */ public static int getThresholdInstance(Instances tcurve, double threshold) { if (!RELATION_NAME.equals(tcurve.relationName()) || (tcurve.numInstances() == 0) || (threshold < 0) || (threshold > 1.0)) { return -1; } if (tcurve.numInstances() == 1) { return 0; } double [] tvals = tcurve.attributeToDoubleArray(tcurve.numAttributes() - 1); int [] sorted = Utils.sort(tvals); return binarySearch(sorted, tvals, threshold); }
Example 2
Source File: MarginCurve.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Calculates the cumulative margin distribution for the set of * predictions, returning the result as a set of Instances. The * structure of these Instances is as follows:<p> <ul> * <li> <b>Margin</b> contains the margin value (which should be plotted * as an x-coordinate) * <li> <b>Current</b> contains the count of instances with the current * margin (plot as y axis) * <li> <b>Cumulative</b> contains the count of instances with margin * less than or equal to the current margin (plot as y axis) * </ul> <p> * * @return datapoints as a set of instances, null if no predictions * have been made. */ public Instances getCurve(FastVector predictions) { if (predictions.size() == 0) { return null; } Instances insts = makeHeader(); double [] margins = getMargins(predictions); int [] sorted = Utils.sort(margins); int binMargin = 0; int totalMargin = 0; insts.add(makeInstance(-1, binMargin, totalMargin)); for (int i = 0; i < sorted.length; i++) { double current = margins[sorted[i]]; double weight = ((NominalPrediction)predictions.elementAt(sorted[i])) .weight(); totalMargin += weight; binMargin += weight; if (true) { insts.add(makeInstance(current, binMargin, totalMargin)); binMargin = 0; } } return insts; }
Example 3
Source File: EvaluationStatisticsUtils.java From meka with GNU General Public License v3.0 | 6 votes |
/** * Rank Matrix */ public static int[][] rankMatrix(List<EvaluationStatistics> stats, String measurement) { double V[][] = valueMatrix(stats,measurement); int N = V.length; int k = V[0].length; int R[][] = new int[N][k]; for (int i = 0; i < N; i++) { int indices[] = Utils.sort(V[i]); // add 1 to each for (int j = 0; j < k; j++) { R[i][indices[j]] = (j+1); } } return R; }
Example 4
Source File: LFSMethods.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * @return ranking (integer array) of attributes in data with evaluator (sorting is NOT stable!) */ public int[] rankAttributes(Instances data, SubsetEvaluator evaluator, boolean verbose) throws Exception { if (verbose) { System.out.println("Ranking attributes with " + evaluator.getClass().getName()); } double[] merit = new double[data.numAttributes()]; BitSet group = new BitSet(data.numAttributes()); for (int k = 0; k < data.numAttributes(); k++) { if (k != data.classIndex()) { group.set(k); merit[k] -= evaluator.evaluateSubset(group); m_evalsTotal++; group.clear(k); } else { merit[k] = Double.MAX_VALUE; } if (verbose) { System.out.println(k + ": " + merit[k]); } } int[] ranking = Utils.sort(merit); if (verbose) { System.out.print("Ranking [ "); for (int i = 0; i < ranking.length; i++) { System.out.print(ranking[i] + " "); } System.out.println("]\n"); } return ranking; }
Example 5
Source File: NominalToBinary.java From tsml with GNU General Public License v3.0 | 5 votes |
/** Computes average class values for each attribute and value */ private void computeAverageClassValues() { double totalCounts, sum; Instance instance; double [] counts; double [][] avgClassValues = new double[getInputFormat().numAttributes()][0]; m_Indices = new int[getInputFormat().numAttributes()][0]; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (att.isNominal()) { avgClassValues[j] = new double [att.numValues()]; counts = new double [att.numValues()]; for (int i = 0; i < getInputFormat().numInstances(); i++) { instance = getInputFormat().instance(i); if (!instance.classIsMissing() && (!instance.isMissing(j))) { counts[(int)instance.value(j)] += instance.weight(); avgClassValues[j][(int)instance.value(j)] += instance.weight() * instance.classValue(); } } sum = Utils.sum(avgClassValues[j]); totalCounts = Utils.sum(counts); if (Utils.gr(totalCounts, 0)) { for (int k = 0; k < att.numValues(); k++) { if (Utils.gr(counts[k], 0)) { avgClassValues[j][k] /= (double)counts[k]; } else { avgClassValues[j][k] = sum / (double)totalCounts; } } } m_Indices[j] = Utils.sort(avgClassValues[j]); } } }
Example 6
Source File: Metrics.java From meka with GNU General Public License v3.0 | 5 votes |
public static double L_RankLoss(int y[], double rpred[]) { // works with missing double[][] aligned = align(y, rpred); y = toIntArray(aligned[0]); rpred = aligned[1]; int r[] = Utils.sort(rpred); return L_RankLoss(y, r); }
Example 7
Source File: Ranker.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Sorts the evaluated attribute list * * @return an array of sorted (highest eval to lowest) attribute indexes * @throws Exception of sorting can't be done. */ public double[][] rankedAttributes () throws Exception { int i, j; if (m_attributeList == null || m_attributeMerit == null) { throw new Exception("Search must be performed before a ranked " + "attribute list can be obtained"); } int[] ranked = Utils.sort(m_attributeMerit); // reverse the order of the ranked indexes double[][] bestToWorst = new double[ranked.length][2]; for (i = ranked.length - 1, j = 0; i >= 0; i--) { bestToWorst[j++][0] = ranked[i]; } // convert the indexes to attribute indexes for (i = 0; i < bestToWorst.length; i++) { int temp = ((int)bestToWorst[i][0]); bestToWorst[i][0] = m_attributeList[temp]; bestToWorst[i][1] = m_attributeMerit[temp]; } if (m_numToSelect > bestToWorst.length) { throw new Exception("More attributes requested than exist in the data"); } if (m_numToSelect <= 0) { if (m_threshold == -Double.MAX_VALUE) { m_calculatedNumToSelect = bestToWorst.length; } else { determineNumToSelectFromThreshold(bestToWorst); } } /* if (m_numToSelect > 0) { determineThreshFromNumToSelect(bestToWorst); } */ return bestToWorst; }
Example 8
Source File: PrincipalComponents.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Set the format for the transformed data * @return a set of empty Instances (header only) in the new format * @throws Exception if the output format can't be set */ private Instances setOutputFormat() throws Exception { if (m_eigenvalues == null) { return null; } double cumulative = 0.0; FastVector attributes = new FastVector(); for (int i = m_numAttribs - 1; i >= 0; i--) { StringBuffer attName = new StringBuffer(); // build array of coefficients double[] coeff_mags = new double[m_numAttribs]; for (int j = 0; j < m_numAttribs; j++) coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]); int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs; // this array contains the sorted indices of the coefficients int[] coeff_inds; if (m_numAttribs > 0) { // if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude coeff_inds = Utils.sort(coeff_mags); } else { // if m_maxAttrsInName <= 0, use all coeffs in original order coeff_inds = new int[m_numAttribs]; for (int j=0; j<m_numAttribs; j++) coeff_inds[j] = j; } // build final attName string for (int j = 0; j < num_attrs; j++) { double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]]; if (j > 0 && coeff_value >= 0) attName.append("+"); attName.append(Utils.doubleToString(coeff_value,5,3) +m_trainInstances.attribute(coeff_inds[j]).name()); } if (num_attrs < m_numAttribs) attName.append("..."); attributes.addElement(new Attribute(attName.toString())); cumulative+=m_eigenvalues[m_sortedEigens[i]]; if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) { break; } } if (m_hasClass) { attributes.addElement(m_trainHeader.classAttribute().copy()); } Instances outputFormat = new Instances(m_trainInstances.relationName()+"_principal components", attributes, 0); // set the class to be the last attribute if necessary if (m_hasClass) { outputFormat.setClassIndex(outputFormat.numAttributes()-1); } m_outputNumAtts = outputFormat.numAttributes(); return outputFormat; }
Example 9
Source File: ThresholdCurve.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Calculates the n point precision result, which is the precision averaged * over n evenly spaced (w.r.t recall) samples of the curve. * * @param tcurve a previously extracted threshold curve Instances. * @param n the number of points to average over. * @return the n-point precision. */ public static double getNPointPrecision(Instances tcurve, int n) { if (!RELATION_NAME.equals(tcurve.relationName()) || (tcurve.numInstances() == 0)) { return Double.NaN; } int recallInd = tcurve.attribute(RECALL_NAME).index(); int precisInd = tcurve.attribute(PRECISION_NAME).index(); double [] recallVals = tcurve.attributeToDoubleArray(recallInd); int [] sorted = Utils.sort(recallVals); double isize = 1.0 / (n - 1); double psum = 0; for (int i = 0; i < n; i++) { int pos = binarySearch(sorted, recallVals, i * isize); double recall = recallVals[sorted[pos]]; double precis = tcurve.instance(sorted[pos]).value(precisInd); /* System.err.println("Point " + (i + 1) + ": i=" + pos + " r=" + (i * isize) + " p'=" + precis + " r'=" + recall); */ // interpolate figures for non-endpoints while ((pos != 0) && (pos < sorted.length - 1)) { pos++; double recall2 = recallVals[sorted[pos]]; if (recall2 != recall) { double precis2 = tcurve.instance(sorted[pos]).value(precisInd); double slope = (precis2 - precis) / (recall2 - recall); double offset = precis - recall * slope; precis = isize * i * slope + offset; /* System.err.println("Point2 " + (i + 1) + ": i=" + pos + " r=" + (i * isize) + " p'=" + precis2 + " r'=" + recall2 + " p''=" + precis); */ break; } } psum += precis; } return psum / n; }
Example 10
Source File: InterquartileRange.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * computes the thresholds for outliers and extreme values * * @param instances the data to work on */ protected void computeThresholds(Instances instances) { int i; double[] values; int[] sortedIndices; int half; int quarter; double q1; double q2; double q3; m_UpperExtremeValue = new double[m_AttributeIndices.length]; m_UpperOutlier = new double[m_AttributeIndices.length]; m_LowerOutlier = new double[m_AttributeIndices.length]; m_LowerExtremeValue = new double[m_AttributeIndices.length]; m_Median = new double[m_AttributeIndices.length]; m_IQR = new double[m_AttributeIndices.length]; for (i = 0; i < m_AttributeIndices.length; i++) { // non-numeric attribute? if (m_AttributeIndices[i] == NON_NUMERIC) continue; // sort attribute data values = instances.attributeToDoubleArray(m_AttributeIndices[i]); sortedIndices = Utils.sort(values); // determine indices half = sortedIndices.length / 2; quarter = half / 2; if (sortedIndices.length % 2 == 1) { q2 = values[sortedIndices[half]]; } else { q2 = (values[sortedIndices[half]] + values[sortedIndices[half + 1]]) / 2; } if (half % 2 == 1) { q1 = values[sortedIndices[quarter]]; q3 = values[sortedIndices[sortedIndices.length - quarter - 1]]; } else { q1 = (values[sortedIndices[quarter]] + values[sortedIndices[quarter + 1]]) / 2; q3 = (values[sortedIndices[sortedIndices.length - quarter - 1]] + values[sortedIndices[sortedIndices.length - quarter]]) / 2; } // determine thresholds and other values m_Median[i] = q2; m_IQR[i] = q3 - q1; m_UpperExtremeValue[i] = q3 + getExtremeValuesFactor() * m_IQR[i]; m_UpperOutlier[i] = q3 + getOutlierFactor() * m_IQR[i]; m_LowerOutlier[i] = q1 - getOutlierFactor() * m_IQR[i]; m_LowerExtremeValue[i] = q1 - getExtremeValuesFactor() * m_IQR[i]; } }
Example 11
Source File: UnivariateEqualFrequencyHistogramEstimator.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Returns the interval for the given confidence value. * * @param conf the confidence value in the interval [0, 1] * @return the interval */ public double[][] predictIntervals(double conf) { // Update the bandwidth updateBoundariesAndOrWeights(); // Compute minimum and maximum value, and delta double val = Statistics.normalInverse(1.0 - (1.0 - conf) / 2); double min = m_TM.firstKey() - val * m_Width; double max = m_TM.lastKey() + val * m_Width; double delta = (max - min) / m_NumIntervals; // Create array with estimated probabilities double[] probabilities = new double[m_NumIntervals]; double leftVal = Math.exp(logDensity(min)); for (int i = 0; i < m_NumIntervals; i++) { double rightVal = Math.exp(logDensity(min + (i + 1) * delta)); probabilities[i] = 0.5 * (leftVal + rightVal) * delta; leftVal = rightVal; } // Sort array based on area of bin estimates int[] sortedIndices = Utils.sort(probabilities); // Mark the intervals to use double sum = 0; boolean[] toUse = new boolean[probabilities.length]; int k = 0; while ((sum < conf) && (k < toUse.length)){ toUse[sortedIndices[toUse.length - (k + 1)]] = true; sum += probabilities[sortedIndices[toUse.length - (k + 1)]]; k++; } // Don't need probabilities anymore probabilities = null; // Create final list of intervals ArrayList<double[]> intervals = new ArrayList<double[]>(); // The current interval double[] interval = null; // Iterate through kernels boolean haveStartedInterval = false; for (int i = 0; i < m_NumIntervals; i++) { // Should the current bin be used? if (toUse[i]) { // Do we need to create a new interval? if (haveStartedInterval == false) { haveStartedInterval = true; interval = new double[2]; interval[0] = min + i * delta; } // Regardless, we should update the upper boundary interval[1] = min + (i + 1) * delta; } else { // We need to finalize and store the last interval // if necessary. if (haveStartedInterval) { haveStartedInterval = false; intervals.add(interval); } } } // Add last interval if there is one if (haveStartedInterval) { intervals.add(interval); } return intervals.toArray(new double[0][0]); }
Example 12
Source File: UnivariateKernelEstimator.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Returns the interval for the given confidence value. * * @param conf the confidence value in the interval [0, 1] * @return the interval */ public double[][] predictIntervals(double conf) { // Update the bandwidth updateWidth(); // Compute minimum and maximum value, and delta double val = Statistics.normalInverse(1.0 - (1.0 - conf) / 2); double min = m_TM.firstKey() - val * m_Width; double max = m_TM.lastKey() + val * m_Width; double delta = (max - min) / m_NumIntervals; // Create array with estimated probabilities double[] probabilities = new double[m_NumIntervals]; double leftVal = Math.exp(logDensity(min)); for (int i = 0; i < m_NumIntervals; i++) { double rightVal = Math.exp(logDensity(min + (i + 1) * delta)); probabilities[i] = 0.5 * (leftVal + rightVal) * delta; leftVal = rightVal; } // Sort array based on area of bin estimates int[] sortedIndices = Utils.sort(probabilities); // Mark the intervals to use double sum = 0; boolean[] toUse = new boolean[probabilities.length]; int k = 0; while ((sum < conf) && (k < toUse.length)){ toUse[sortedIndices[toUse.length - (k + 1)]] = true; sum += probabilities[sortedIndices[toUse.length - (k + 1)]]; k++; } // Don't need probabilities anymore probabilities = null; // Create final list of intervals ArrayList<double[]> intervals = new ArrayList<double[]>(); // The current interval double[] interval = null; // Iterate through kernels boolean haveStartedInterval = false; for (int i = 0; i < m_NumIntervals; i++) { // Should the current bin be used? if (toUse[i]) { // Do we need to create a new interval? if (haveStartedInterval == false) { haveStartedInterval = true; interval = new double[2]; interval[0] = min + i * delta; } // Regardless, we should update the upper boundary interval[1] = min + (i + 1) * delta; } else { // We need to finalize and store the last interval // if necessary. if (haveStartedInterval) { haveStartedInterval = false; intervals.add(interval); } } } // Add last interval if there is one if (haveStartedInterval) { intervals.add(interval); } return intervals.toArray(new double[0][0]); }
Example 13
Source File: PMCC.java From meka with GNU General Public License v3.0 | 4 votes |
@Override public void buildClassifier(Instances D) throws Exception { m_R = new Random(m_S); // Variables int L = D.classIndex(); int N = D.numInstances(); int d = D.numAttributes()-L; h = new CC[m_M]; w = new double[m_M]; //int s[][] = new int[m_M][L]; // for interest's sake if (m_Is >= m_M) { //HashMap<String,CC> id2cc = new HashMap<String,CC>(); // Make CC int s[] = MLUtils.gen_indices(L); MLUtils.randomize(s,m_R); h[0] = buildCC(Arrays.copyOf(s,s.length),D); // @todo move into setChain(..) w[0] = payoff(h[0],D); //id2cc.put(Arrays.toString(s),h[0]); // save a copy //s[0] = s_; if(getDebug()) System.out.println("s[0] = "+Arrays.toString(s)); for(int t = 0; t < m_Is; t++) { // propose a chain s' ~ pi(s'|s) int s_[] = (m_O > 0) ? pi(Arrays.copyOf(s,s.length),m_R,t,m_Beta) : // default cond. option - with temperature A.swap(Arrays.copyOf(s,s.length),m_R) ; // special simple option - swap two elements // build h' with sequence s' //CC h_ = rebuildCC(getClosest(id2cc,Arrays.toString(s_)),s_,D); CC h_ = buildCC(Arrays.copyOf(s_,s_.length),D); //id2cc.put(Arrays.toString(s_), h_); // rate h' (by its performance on the training data) double w_ = payoff(h_,D); // accept h' weighted more than the weakest h in the population int min = Utils.sort(w)[0]; // (min index) if (w_ > w[min]) { w[min] = w_; h[min] = h_; if (getDebug()) System.out.println(" accepted h_ with score "+w_+" > "+w[min]); s = s_; } else if (getDebug()) System.out.println(" DENIED h_ with score "+w_+" !> score "+w[min]); } if (getDebug()) System.out.println("---"); // normalise weights Utils.normalize(w); } else { throw new Exception("[Error] Number of chains evaluated (Is) should be at least as great as the population selected (M), and always greater than 0."); } }
Example 14
Source File: A.java From meka with GNU General Public License v3.0 | 4 votes |
public static final int[] sort(int a[]) { int c[] = Arrays.copyOf(a,a.length); Utils.sort(c); // @todo: Arrays.sort ? return c; }
Example 15
Source File: MetricsTest.java From meka with GNU General Public License v3.0 | 4 votes |
public void testUtilSort(){ int[] real = {0,1,0,1,0,1}; double[] pred = {0.4, 0.1, 0.5, 0.2, 0.6, 0.3}; int[] sorted = Utils.sort(pred); assertTrue(Arrays.toString(sorted), Arrays.equals(new int[]{1,3,5,0,2,4},sorted)); // for(int i =0; i < sorted.length; sorted++){ // assertEquals(new int[]{3,0,4,1,5,2},sorted); // } }
Example 16
Source File: C45PruneableClassifierTreeG.java From tsml with GNU General Public License v3.0 | 3 votes |
/** * sorts the int array in ascending order by attribute indexed * by a in dataset data. * @param the data the indices represent * @param the index of the attribute to sort by * @return array of sorted indicies */ private int [] sortByAttribute(Instances data, int a) { double [] attList = data.attributeToDoubleArray(a); int [] temp = Utils.sort(attList); return temp; }