weka.core.Utils#sort

Source File: ThresholdCurve.java From tsml with GNU General Public License v3.0

6 votes

/**
 * Gets the index of the instance with the closest threshold value to the
 * desired target
 *
 * @param tcurve a set of instances that have been generated by this class
 * @param threshold the target threshold
 * @return the index of the instance that has threshold closest to
 * the target, or -1 if this could not be found (i.e. no data, or
 * bad threshold target)
 */
public static int getThresholdInstance(Instances tcurve, double threshold) {

  if (!RELATION_NAME.equals(tcurve.relationName()) 
      || (tcurve.numInstances() == 0)
      || (threshold < 0)
      || (threshold > 1.0)) {
    return -1;
  }
  if (tcurve.numInstances() == 1) {
    return 0;
  }
  double [] tvals = tcurve.attributeToDoubleArray(tcurve.numAttributes() - 1);
  int [] sorted = Utils.sort(tvals);
  return binarySearch(sorted, tvals, threshold);
}

Source File: MarginCurve.java From tsml with GNU General Public License v3.0

6 votes

/**
 * Calculates the cumulative margin distribution for the set of
 * predictions, returning the result as a set of Instances. The
 * structure of these Instances is as follows:<p> <ul> 
 * <li> <b>Margin</b> contains the margin value (which should be plotted
 * as an x-coordinate) 
 * <li> <b>Current</b> contains the count of instances with the current 
 * margin (plot as y axis)
 * <li> <b>Cumulative</b> contains the count of instances with margin
 * less than or equal to the current margin (plot as y axis)
 * </ul> <p>
 *
 * @return datapoints as a set of instances, null if no predictions
 * have been made.  
 */
public Instances getCurve(FastVector predictions) {

  if (predictions.size() == 0) {
    return null;
  }

  Instances insts = makeHeader();
  double [] margins = getMargins(predictions);
  int [] sorted = Utils.sort(margins);
  int binMargin = 0;
  int totalMargin = 0;
  insts.add(makeInstance(-1, binMargin, totalMargin));
  for (int i = 0; i < sorted.length; i++) {
    double current = margins[sorted[i]];
    double weight = ((NominalPrediction)predictions.elementAt(sorted[i]))
      .weight();
    totalMargin += weight;
    binMargin += weight;
    if (true) {
      insts.add(makeInstance(current, binMargin, totalMargin));
      binMargin = 0;
    }
  }
  return insts;
}

Source File: EvaluationStatisticsUtils.java From meka with GNU General Public License v3.0

6 votes

/**
 * Rank Matrix
 */
public static int[][] rankMatrix(List<EvaluationStatistics> stats, String measurement) {

	double V[][] = valueMatrix(stats,measurement);


	int N = V.length;
	int k = V[0].length;

	int R[][] = new int[N][k];
	for (int i = 0; i < N; i++) {
		int indices[] = Utils.sort(V[i]);
		// add 1 to each
		for (int j = 0; j < k; j++) {
			R[i][indices[j]] = (j+1);
		}
	}


	return R;
}

Source File: LFSMethods.java From tsml with GNU General Public License v3.0

5 votes

/**
 * @return ranking (integer array) of attributes in data with evaluator (sorting is NOT stable!)
 */
public int[] rankAttributes(Instances data, SubsetEvaluator evaluator,
                            boolean verbose) throws Exception {
  if (verbose) {
    System.out.println("Ranking attributes with " +
                       evaluator.getClass().getName());
  }

  double[] merit = new double[data.numAttributes()];
  BitSet group = new BitSet(data.numAttributes());

  for (int k = 0; k < data.numAttributes(); k++) {
    if (k != data.classIndex()) {
      group.set(k);
      merit[k] -= evaluator.evaluateSubset(group);
      m_evalsTotal++;
      group.clear(k);
    } else {
      merit[k] = Double.MAX_VALUE;
    }

    if (verbose) {
      System.out.println(k + ": " + merit[k]);
    }
  }

  int[] ranking = Utils.sort(merit);

  if (verbose) {
    System.out.print("Ranking [ ");

    for (int i = 0; i < ranking.length; i++) {
      System.out.print(ranking[i] + " ");
    }

    System.out.println("]\n");
  }

  return ranking;
}

Source File: NominalToBinary.java From tsml with GNU General Public License v3.0

5 votes

/** Computes average class values for each attribute and value */
 private void computeAverageClassValues() {

   double totalCounts, sum;
   Instance instance;
   double [] counts;

   double [][] avgClassValues = new double[getInputFormat().numAttributes()][0];
   m_Indices = new int[getInputFormat().numAttributes()][0];
   for (int j = 0; j < getInputFormat().numAttributes(); j++) {
     Attribute att = getInputFormat().attribute(j);
     if (att.isNominal()) {
avgClassValues[j] = new double [att.numValues()];
counts = new double [att.numValues()];
for (int i = 0; i < getInputFormat().numInstances(); i++) {
  instance = getInputFormat().instance(i);
  if (!instance.classIsMissing() && 
      (!instance.isMissing(j))) {
    counts[(int)instance.value(j)] += instance.weight();
    avgClassValues[j][(int)instance.value(j)] += 
      instance.weight() * instance.classValue();
  }
}
sum = Utils.sum(avgClassValues[j]);
totalCounts = Utils.sum(counts);
if (Utils.gr(totalCounts, 0)) {
  for (int k = 0; k < att.numValues(); k++) {
    if (Utils.gr(counts[k], 0)) {
      avgClassValues[j][k] /= (double)counts[k];
    } else {
      avgClassValues[j][k] = sum / (double)totalCounts;
    }
  }
}
m_Indices[j] = Utils.sort(avgClassValues[j]);
     }
   }
 }

Source File: Metrics.java From meka with GNU General Public License v3.0

5 votes

public static double L_RankLoss(int y[], double rpred[]) {
// works with missing

       double[][] aligned = align(y, rpred);

       y = toIntArray(aligned[0]);
       rpred = aligned[1];

       int r[] = Utils.sort(rpred);
       return L_RankLoss(y, r);
   }

Source File: Ranker.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Sorts the evaluated attribute list
 *
 * @return an array of sorted (highest eval to lowest) attribute indexes
 * @throws Exception of sorting can't be done.
 */
public double[][] rankedAttributes ()
  throws Exception {
  int i, j;

  if (m_attributeList == null || m_attributeMerit == null) {
    throw  new Exception("Search must be performed before a ranked " 
                         + "attribute list can be obtained");
  }

  int[] ranked = Utils.sort(m_attributeMerit);
  // reverse the order of the ranked indexes
  double[][] bestToWorst = new double[ranked.length][2];

  for (i = ranked.length - 1, j = 0; i >= 0; i--) {
    bestToWorst[j++][0] = ranked[i];
  }

  // convert the indexes to attribute indexes
  for (i = 0; i < bestToWorst.length; i++) {
    int temp = ((int)bestToWorst[i][0]);
    bestToWorst[i][0] = m_attributeList[temp];
    bestToWorst[i][1] = m_attributeMerit[temp];
  }
  
  if (m_numToSelect > bestToWorst.length) {
    throw new Exception("More attributes requested than exist in the data");
  }

  if (m_numToSelect <= 0) {
    if (m_threshold == -Double.MAX_VALUE) {
      m_calculatedNumToSelect = bestToWorst.length;
    } else {
      determineNumToSelectFromThreshold(bestToWorst);
    }
  }
  /*    if (m_numToSelect > 0) {
    determineThreshFromNumToSelect(bestToWorst);
    } */

  return  bestToWorst;
}

Source File: PrincipalComponents.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Set the format for the transformed data
 * @return a set of empty Instances (header only) in the new format
 * @throws Exception if the output format can't be set
 */
private Instances setOutputFormat() throws Exception {
  if (m_eigenvalues == null) {
    return null;
  }

  double cumulative = 0.0;
  FastVector attributes = new FastVector();
   for (int i = m_numAttribs - 1; i >= 0; i--) {
     StringBuffer attName = new StringBuffer();
     // build array of coefficients
     double[] coeff_mags = new double[m_numAttribs];
     for (int j = 0; j < m_numAttribs; j++)
       coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]);
     int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs;
     // this array contains the sorted indices of the coefficients
     int[] coeff_inds;
     if (m_numAttribs > 0) {
        // if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude
        coeff_inds = Utils.sort(coeff_mags);
     } else {
        // if  m_maxAttrsInName <= 0, use all coeffs in original order
        coeff_inds = new int[m_numAttribs];
        for (int j=0; j<m_numAttribs; j++)
          coeff_inds[j] = j;
     }
     // build final attName string
     for (int j = 0; j < num_attrs; j++) {
       double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]];
       if (j > 0 && coeff_value >= 0)
         attName.append("+");
       attName.append(Utils.doubleToString(coeff_value,5,3)
                      +m_trainInstances.attribute(coeff_inds[j]).name());
     }
     if (num_attrs < m_numAttribs)
       attName.append("...");
       
     attributes.addElement(new Attribute(attName.toString()));
     cumulative+=m_eigenvalues[m_sortedEigens[i]];

     if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
       break;
     }
   }
   
   if (m_hasClass) {
     attributes.addElement(m_trainHeader.classAttribute().copy());
   }

   Instances outputFormat = 
     new Instances(m_trainInstances.relationName()+"_principal components",
                   attributes, 0);

   // set the class to be the last attribute if necessary
   if (m_hasClass) {
     outputFormat.setClassIndex(outputFormat.numAttributes()-1);
   }
   
   m_outputNumAtts = outputFormat.numAttributes();
   return outputFormat;
}

Source File: ThresholdCurve.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Calculates the n point precision result, which is the precision averaged
 * over n evenly spaced (w.r.t recall) samples of the curve.
 *
 * @param tcurve a previously extracted threshold curve Instances.
 * @param n the number of points to average over.
 * @return the n-point precision.
 */
public static double getNPointPrecision(Instances tcurve, int n) {

  if (!RELATION_NAME.equals(tcurve.relationName()) 
      || (tcurve.numInstances() == 0)) {
    return Double.NaN;
  }
  int recallInd = tcurve.attribute(RECALL_NAME).index();
  int precisInd = tcurve.attribute(PRECISION_NAME).index();
  double [] recallVals = tcurve.attributeToDoubleArray(recallInd);
  int [] sorted = Utils.sort(recallVals);
  double isize = 1.0 / (n - 1);
  double psum = 0;
  for (int i = 0; i < n; i++) {
    int pos = binarySearch(sorted, recallVals, i * isize);
    double recall = recallVals[sorted[pos]];
    double precis = tcurve.instance(sorted[pos]).value(precisInd);
    /*
    System.err.println("Point " + (i + 1) + ": i=" + pos 
                       + " r=" + (i * isize)
                       + " p'=" + precis 
                       + " r'=" + recall);
    */
    // interpolate figures for non-endpoints
    while ((pos != 0) && (pos < sorted.length - 1)) {
      pos++;
      double recall2 = recallVals[sorted[pos]];
      if (recall2 != recall) {
        double precis2 = tcurve.instance(sorted[pos]).value(precisInd);
        double slope = (precis2 - precis) / (recall2 - recall);
        double offset = precis - recall * slope;
        precis = isize * i * slope + offset;
        /*
        System.err.println("Point2 " + (i + 1) + ": i=" + pos 
                           + " r=" + (i * isize)
                           + " p'=" + precis2 
                           + " r'=" + recall2
                           + " p''=" + precis);
        */
        break;
      }
    }
    psum += precis;
  }
  return psum / n;
}

Source File: InterquartileRange.java From tsml with GNU General Public License v3.0

4 votes

/**
  * computes the thresholds for outliers and extreme values
  * 
  * @param instances	the data to work on
  */
 protected void computeThresholds(Instances instances) {
   int		i;
   double[]	values;
   int[]	sortedIndices;
   int		half;
   int		quarter;
   double	q1;
   double	q2;
   double	q3;
   
   m_UpperExtremeValue = new double[m_AttributeIndices.length];
   m_UpperOutlier      = new double[m_AttributeIndices.length];
   m_LowerOutlier      = new double[m_AttributeIndices.length];
   m_LowerExtremeValue = new double[m_AttributeIndices.length];
   m_Median            = new double[m_AttributeIndices.length];
   m_IQR               = new double[m_AttributeIndices.length];
   
   for (i = 0; i < m_AttributeIndices.length; i++) {
     // non-numeric attribute?
     if (m_AttributeIndices[i] == NON_NUMERIC)
continue;
     
     // sort attribute data
     values        = instances.attributeToDoubleArray(m_AttributeIndices[i]);
     sortedIndices = Utils.sort(values);
     
     // determine indices
     half    = sortedIndices.length / 2;
     quarter = half / 2;
     
     if (sortedIndices.length % 2 == 1) {
q2 = values[sortedIndices[half]];
     }
     else {
q2 = (values[sortedIndices[half]] + values[sortedIndices[half + 1]]) / 2;
     }
     
     if (half % 2 == 1) {
q1 = values[sortedIndices[quarter]];
q3 = values[sortedIndices[sortedIndices.length - quarter - 1]];
     }
     else {
q1 = (values[sortedIndices[quarter]] + values[sortedIndices[quarter + 1]]) / 2;
q3 = (values[sortedIndices[sortedIndices.length - quarter - 1]] + values[sortedIndices[sortedIndices.length - quarter]]) / 2;
     }
     
     // determine thresholds and other values
     m_Median[i]            = q2;
     m_IQR[i]               = q3 - q1;
     m_UpperExtremeValue[i] = q3 + getExtremeValuesFactor() * m_IQR[i];
     m_UpperOutlier[i]      = q3 + getOutlierFactor()       * m_IQR[i];
     m_LowerOutlier[i]      = q1 - getOutlierFactor()       * m_IQR[i];
     m_LowerExtremeValue[i] = q1 - getExtremeValuesFactor() * m_IQR[i];
   }
 }

Source File: UnivariateEqualFrequencyHistogramEstimator.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Returns the interval for the given confidence value. 
 * 
 * @param conf the confidence value in the interval [0, 1]
 * @return the interval
 */
public double[][] predictIntervals(double conf) {

  // Update the bandwidth
  updateBoundariesAndOrWeights();

  // Compute minimum and maximum value, and delta
  double val = Statistics.normalInverse(1.0 - (1.0 - conf) / 2);
  double min = m_TM.firstKey() - val * m_Width;
  double max = m_TM.lastKey() + val * m_Width;
  double delta = (max - min) / m_NumIntervals;

  // Create array with estimated probabilities
  double[] probabilities = new double[m_NumIntervals];
  double leftVal = Math.exp(logDensity(min));
  for (int i = 0; i < m_NumIntervals; i++) {
    double rightVal = Math.exp(logDensity(min + (i + 1) * delta));
    probabilities[i] = 0.5 * (leftVal + rightVal) * delta;
    leftVal = rightVal;
  }

  // Sort array based on area of bin estimates
  int[] sortedIndices = Utils.sort(probabilities);

  // Mark the intervals to use
  double sum = 0;
  boolean[] toUse = new boolean[probabilities.length];
  int k = 0;
  while ((sum < conf) && (k < toUse.length)){
    toUse[sortedIndices[toUse.length - (k + 1)]] = true;
    sum += probabilities[sortedIndices[toUse.length - (k + 1)]];
    k++;
  }

  // Don't need probabilities anymore
  probabilities = null;

  // Create final list of intervals
  ArrayList<double[]> intervals = new ArrayList<double[]>();

  // The current interval
  double[] interval = null;
  
  // Iterate through kernels
  boolean haveStartedInterval = false;
  for (int i = 0; i < m_NumIntervals; i++) {

    // Should the current bin be used?
    if (toUse[i]) {

      // Do we need to create a new interval?
      if (haveStartedInterval == false) {
        haveStartedInterval = true;
        interval = new double[2];
        interval[0] = min + i * delta;
      }

      // Regardless, we should update the upper boundary
      interval[1] = min + (i + 1) * delta;
    } else {

      // We need to finalize and store the last interval
      // if necessary.
      if (haveStartedInterval) {
        haveStartedInterval = false;
        intervals.add(interval);
      }
    }
  }

  // Add last interval if there is one
  if (haveStartedInterval) {
    intervals.add(interval);
  }

  return intervals.toArray(new double[0][0]);
}

Source File: UnivariateKernelEstimator.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Returns the interval for the given confidence value. 
 * 
 * @param conf the confidence value in the interval [0, 1]
 * @return the interval
 */
public double[][] predictIntervals(double conf) {

  // Update the bandwidth
  updateWidth();

  // Compute minimum and maximum value, and delta
  double val = Statistics.normalInverse(1.0 - (1.0 - conf) / 2);
  double min = m_TM.firstKey() - val * m_Width;
  double max = m_TM.lastKey() + val * m_Width;
  double delta = (max - min) / m_NumIntervals;

  // Create array with estimated probabilities
  double[] probabilities = new double[m_NumIntervals];
  double leftVal = Math.exp(logDensity(min));
  for (int i = 0; i < m_NumIntervals; i++) {
    double rightVal = Math.exp(logDensity(min + (i + 1) * delta));
    probabilities[i] = 0.5 * (leftVal + rightVal) * delta;
    leftVal = rightVal;
  }

  // Sort array based on area of bin estimates
  int[] sortedIndices = Utils.sort(probabilities);

  // Mark the intervals to use
  double sum = 0;
  boolean[] toUse = new boolean[probabilities.length];
  int k = 0;
  while ((sum < conf) && (k < toUse.length)){
    toUse[sortedIndices[toUse.length - (k + 1)]] = true;
    sum += probabilities[sortedIndices[toUse.length - (k + 1)]];
    k++;
  }

  // Don't need probabilities anymore
  probabilities = null;

  // Create final list of intervals
  ArrayList<double[]> intervals = new ArrayList<double[]>();

  // The current interval
  double[] interval = null;
  
  // Iterate through kernels
  boolean haveStartedInterval = false;
  for (int i = 0; i < m_NumIntervals; i++) {

    // Should the current bin be used?
    if (toUse[i]) {

      // Do we need to create a new interval?
      if (haveStartedInterval == false) {
        haveStartedInterval = true;
        interval = new double[2];
        interval[0] = min + i * delta;
      }

      // Regardless, we should update the upper boundary
      interval[1] = min + (i + 1) * delta;
    } else {

      // We need to finalize and store the last interval
      // if necessary.
      if (haveStartedInterval) {
        haveStartedInterval = false;
        intervals.add(interval);
      }
    }
  }

  // Add last interval if there is one
  if (haveStartedInterval) {
    intervals.add(interval);
  }

  return intervals.toArray(new double[0][0]);
}

Source File: PMCC.java From meka with GNU General Public License v3.0

4 votes

@Override
public void buildClassifier(Instances D) throws Exception {

	m_R = new Random(m_S);

	// Variables

	int L = D.classIndex(); 
	int N = D.numInstances();
	int d = D.numAttributes()-L;

	h = new CC[m_M];
	w = new double[m_M];
	//int s[][] = new int[m_M][L]; // for interest's sake

	if (m_Is >= m_M) {

		//HashMap<String,CC> id2cc = new HashMap<String,CC>();

		// Make CC
		int s[] = MLUtils.gen_indices(L); 
		MLUtils.randomize(s,m_R);
		h[0] = buildCC(Arrays.copyOf(s,s.length),D); // @todo move into setChain(..)
		w[0] = payoff(h[0],D);
		//id2cc.put(Arrays.toString(s),h[0]);			// save a copy
		//s[0] = s_;
		if(getDebug()) System.out.println("s[0] = "+Arrays.toString(s));

		for(int t = 0; t < m_Is; t++) {

			// propose a chain s' ~ pi(s'|s) 
			int s_[] = (m_O > 0) ? 
				  pi(Arrays.copyOf(s,s.length),m_R,t,m_Beta)	  :	// default cond. option - with temperature
				  A.swap(Arrays.copyOf(s,s.length),m_R) ;	        // special simple option - swap two elements

			// build h' with sequence s'
			//CC h_ = rebuildCC(getClosest(id2cc,Arrays.toString(s_)),s_,D);
			CC h_ = buildCC(Arrays.copyOf(s_,s_.length),D);
			//id2cc.put(Arrays.toString(s_), h_);

			// rate h' (by its performance on the training data)
			double w_ = payoff(h_,D);

			// accept h' weighted more than the weakest h in the population
			int min = Utils.sort(w)[0]; // (min index)
			if (w_ > w[min]) {
				w[min] = w_;
				h[min] = h_;
				if (getDebug()) System.out.println(" accepted h_ with score "+w_+" > "+w[min]);
				s = s_;
			}
			else
				if (getDebug()) System.out.println(" DENIED h_ with score "+w_+" !> score "+w[min]);
		}
		if (getDebug()) System.out.println("---");

		// normalise weights
		Utils.normalize(w);
	}
	else {
		throw new Exception("[Error] Number of chains evaluated (Is) should be at least as great as the population selected (M), and always greater than 0.");
	}

}

Source File: A.java From meka with GNU General Public License v3.0

4 votes

public static final int[] sort(int a[]) {
	int c[] = Arrays.copyOf(a,a.length);
	Utils.sort(c); // @todo: Arrays.sort ?
	return c;
}

Source File: MetricsTest.java From meka with GNU General Public License v3.0

4 votes

public void testUtilSort(){
    int[] real = {0,1,0,1,0,1};
    double[] pred = {0.4,
                     0.1,
                     0.5,
                     0.2,
                     0.6,
                     0.3};

    int[] sorted = Utils.sort(pred);

    assertTrue(Arrays.toString(sorted),
               Arrays.equals(new int[]{1,3,5,0,2,4},sorted));

    
    
    // for(int i =0; i < sorted.length; sorted++){
    //     assertEquals(new int[]{3,0,4,1,5,2},sorted);
    // }
    
    
}

Source File: C45PruneableClassifierTreeG.java From tsml with GNU General Public License v3.0

3 votes

/**
 * sorts the int array in ascending order by attribute indexed 
 * by a in dataset data.  
 * @param the data the indices represent
 * @param the index of the attribute to sort by
 * @return array of sorted indicies
 */
private int [] sortByAttribute(Instances data, int a) {

  double [] attList = data.attributeToDoubleArray(a);
  int [] temp = Utils.sort(attList);
  return temp;
}

Java Code Examples for weka.core.Utils#sort()