weka.core.Utils#gr

Source File: MINND.java From tsml with GNU General Public License v3.0

6 votes

/**
 * Compute the target function to minimize in gradient descent
 * The formula is:<br/>
 * 1/2*sum[i=1..p](f(X, Xi)-var(Y, Yi))^2 <p/>
 * where p is the number of exemplars and Y is the class label.
 * In the case of X=MU, f() is the Euclidean distance between two
 * exemplars together with the related weights and var() is 
 * sqrt(numDimension)*(Y-Yi) where Y-Yi is either 0 (when Y==Yi)
 * or 1 (Y!=Yi) 
 *
 * @param x the weights of the exemplar in question
 * @param rowpos row index of x in X
 * @param Y the observed class label
 * @return the result of the target function
 */
public double target(double[] x, double[][] X, int rowpos, double[] Y){
  double y = Y[rowpos], result=0;

  for(int i=0; i < X.length; i++){
    if((i != rowpos) && (X[i] != null)){
      double var = (y==Y[i]) ? 0.0 : Math.sqrt((double)m_Dimension - 1);
      double f=0;
      for(int j=0; j < m_Dimension; j++)
        if(Utils.gr(m_Variance[rowpos][j], 0.0)){
          f += x[j]*(X[rowpos][j]-X[i][j]) * (X[rowpos][j]-X[i][j]);     
          //System.out.println("i:"+i+" j: "+j+" row: "+rowpos);
        }
      f = Math.sqrt(f);
      //System.out.println("???distance between "+rowpos+" and "+i+": "+f+"|y:"+y+" vs "+Y[i]);
      if(Double.isInfinite(f))
        System.exit(1);
      result += 0.5 * (f - var) * (f - var);
    }
  }
  //System.out.println("???target: "+result);
  return result;
}

Source File: LMT.java From tsml with GNU General Public License v3.0

6 votes

/**
  * Classifies an instance.
  *
  * @param instance the instance to classify
  * @return the classification
  * @throws Exception if instance can't be classified successfully
  */
 public double classifyInstance(Instance instance) throws Exception {

   double maxProb = -1;
   int maxIndex = 0;
     
   //classify by maximum probability
   double[] probs = distributionForInstance(instance);       
   for (int j = 0; j < instance.numClasses(); j++) {
     if (Utils.gr(probs[j], maxProb)) {
maxIndex = j;
maxProb = probs[j];
     }
   }     
   return (double)maxIndex;      
 }

Source File: FT.java From tsml with GNU General Public License v3.0

6 votes

/**
  * Classifies an instance.
  *
  * @param instance the instance to classify
  * @return the classification
  * @throws Exception if instance can't be classified successfully
  */
 public double classifyInstance(Instance instance) throws Exception {

   double maxProb = -1;
   int maxIndex = 0;
  
   //classify by maximum probability
   double[] probs = distributionForInstance(instance);       
   for (int j = 0; j < instance.numClasses(); j++) {
     if (Utils.gr(probs[j], maxProb)) {
maxIndex = j;
maxProb = probs[j];
     }
   }     
   return (double)maxIndex;      
 }

Source File: EntropySplitCrit.java From tsml with GNU General Public License v3.0

6 votes

/**
  * Computes entropy of test distribution with respect to training distribution.
  */
 public final double splitCritValue(Distribution train, Distribution test) {

   double result = 0;
   int numClasses = 0;
   int i, j;
   
   // Find out relevant number of classes
   for (j = 0; j < test.numClasses(); j++)
     if (Utils.gr(train.perClass(j), 0) || Utils.gr(test.perClass(j), 0))
numClasses++;

   // Compute entropy of test data with respect to training data
   for (i = 0; i < test.numBags(); i++)
     if (Utils.gr(test.perBag(i),0)) {
for (j = 0; j < test.numClasses(); j++)
  if (Utils.gr(test.perClassPerBag(i, j), 0))
    result -= test.perClassPerBag(i, j)*
      Math.log(train.perClassPerBag(i, j) + 1);
result += test.perBag(i) * Math.log(train.perBag(i) + numClasses);
     }
 
   return result / log2;
 }

Source File: ClassifierDecList.java From tsml with GNU General Public License v3.0

6 votes

/** 
  * Classifies an instance.
  *
  * @exception Exception if something goes wrong
  */
 public double classifyInstance(Instance instance)
      throws Exception {

   double maxProb = -1;
   double currentProb;
   int maxIndex = 0;
   int j;

   for (j = 0; j < instance.numClasses();
 j++){
     currentProb = getProbs(j,instance,1);
     if (Utils.gr(currentProb,maxProb)){
maxIndex = j;
maxProb = currentProb;
     }
   }
   if (Utils.eq(maxProb,0))
     return -1.0;
   else
     return (double)maxIndex;
 }

Source File: NominalToBinary.java From tsml with GNU General Public License v3.0

5 votes

/** Computes average class values for each attribute and value */
 private void computeAverageClassValues() {

   double totalCounts, sum;
   Instance instance;
   double [] counts;

   double [][] avgClassValues = new double[getInputFormat().numAttributes()][0];
   m_Indices = new int[getInputFormat().numAttributes()][0];
   for (int j = 0; j < getInputFormat().numAttributes(); j++) {
     Attribute att = getInputFormat().attribute(j);
     if (att.isNominal()) {
avgClassValues[j] = new double [att.numValues()];
counts = new double [att.numValues()];
for (int i = 0; i < getInputFormat().numInstances(); i++) {
  instance = getInputFormat().instance(i);
  if (!instance.classIsMissing() && 
      (!instance.isMissing(j))) {
    counts[(int)instance.value(j)] += instance.weight();
    avgClassValues[j][(int)instance.value(j)] += 
      instance.weight() * instance.classValue();
  }
}
sum = Utils.sum(avgClassValues[j]);
totalCounts = Utils.sum(counts);
if (Utils.gr(totalCounts, 0)) {
  for (int k = 0; k < att.numValues(); k++) {
    if (Utils.gr(counts[k], 0)) {
      avgClassValues[j][k] /= (double)counts[k];
    } else {
      avgClassValues[j][k] = sum / (double)totalCounts;
    }
  }
}
m_Indices[j] = Utils.sort(avgClassValues[j]);
     }
   }
 }

Source File: Distribution.java From tsml with GNU General Public License v3.0

5 votes

/**
  * Returns class with highest frequency over all bags.
  */
 public final int maxClass() {

   double maxCount = 0;
   int maxIndex = 0;
   int i;

   for (i=0;i<m_perClass.length;i++)
     if (Utils.gr(m_perClass[i],maxCount)) {
maxCount = m_perClass[i];
maxIndex = i;
     }

   return maxIndex;
 }

Source File: SMO.java From tsml with GNU General Public License v3.0

5 votes

/**
    * Quick and dirty check whether the quadratic programming problem is solved.
    * 
    * @throws Exception if checking fails
    */
   protected void checkClassifier() throws Exception {

     double sum = 0;
     for (int i = 0; i < m_alpha.length; i++) {
if (m_alpha[i] > 0) {
  sum += m_class[i] * m_alpha[i];
}
     }
     System.err.println("Sum of y(i) * alpha(i): " + sum);

     for (int i = 0; i < m_alpha.length; i++) {
double output = SVMOutput(i, m_data.instance(i));
if (Utils.eq(m_alpha[i], 0)) {
  if (Utils.sm(m_class[i] * output, 1)) {
    System.err.println("KKT condition 1 violated: " + m_class[i] * output);
  }
} 
if (Utils.gr(m_alpha[i], 0) && 
    Utils.sm(m_alpha[i], m_C * m_data.instance(i).weight())) {
  if (!Utils.eq(m_class[i] * output, 1)) {
    System.err.println("KKT condition 2 violated: " + m_class[i] * output);
  }
} 
if (Utils.eq(m_alpha[i], m_C * m_data.instance(i).weight())) {
  if (Utils.gr(m_class[i] * output, 1)) {
    System.err.println("KKT condition 3 violated: " + m_class[i] * output);
  }
} 
     }
   }

Source File: MISMO.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Quick and dirty check whether the quadratic programming problem is solved.
 * 
 * @throws Exception if something goes wrong
 */
protected void checkClassifier() throws Exception {

  double sum = 0;
  for (int i = 0; i < m_alpha.length; i++) {
    if (m_alpha[i] > 0) {
      sum += m_class[i] * m_alpha[i];
    }
  }
  System.err.println("Sum of y(i) * alpha(i): " + sum);

  for (int i = 0; i < m_alpha.length; i++) {
    double output = SVMOutput(i, m_data.instance(i));
    if (Utils.eq(m_alpha[i], 0)) {
      if (Utils.sm(m_class[i] * output, 1)) {
        System.err.println("KKT condition 1 violated: " + m_class[i] * output);
      }
    } 
    if (Utils.gr(m_alpha[i], 0) && 
        Utils.sm(m_alpha[i], m_C * m_data.instance(i).weight())) {
      if (!Utils.eq(m_class[i] * output, 1)) {
        System.err.println("KKT condition 2 violated: " + m_class[i] * output);
      }
        } 
    if (Utils.eq(m_alpha[i], m_C * m_data.instance(i).weight())) {
      if (Utils.gr(m_class[i] * output, 1)) {
        System.err.println("KKT condition 3 violated: " + m_class[i] * output);
      }
    } 
  }
}

Source File: MakeDecList.java From tsml with GNU General Public License v3.0

5 votes

/** 
  * Returns the class distribution for an instance.
  *
  * @exception Exception if distribution can't be computed
  */
 public double[] distributionForInstance(Instance instance) 
      throws Exception {

   double [] currentProbs = null;
   double [] sumProbs;
   double currentWeight, weight = 1;
   int i,j;

   // Get probabilities.
   sumProbs = new double [instance.numClasses()];
   i = 0;
   while (Utils.gr(weight,0)){
     currentWeight = 
((ClassifierDecList)theRules.elementAt(i)).weight(instance);
     if (Utils.gr(currentWeight,0)) {
currentProbs = ((ClassifierDecList)theRules.elementAt(i)).
  distributionForInstance(instance);
for (j = 0; j < sumProbs.length; j++)
  sumProbs[j] += weight*currentProbs[j];
weight = weight*(1-currentWeight);
     }
     i++;
   }

   return sumProbs;
 }

Source File: ReplaceMissingValues.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Signify that this batch of input to the filter is finished. 
  * If the filter requires all instances prior to filtering,
  * output() may now be called to retrieve the filtered instances.
  *
  * @return true if there are instances pending output
  * @throws IllegalStateException if no input structure has been defined
  */
 public boolean batchFinished() {

   if (getInputFormat() == null) {
     throw new IllegalStateException("No input instance format defined");
   }

   if (m_ModesAndMeans == null) {
     // Compute modes and means
     double sumOfWeights =  getInputFormat().sumOfWeights();
     double[][] counts = new double[getInputFormat().numAttributes()][];
     for (int i = 0; i < getInputFormat().numAttributes(); i++) {
if (getInputFormat().attribute(i).isNominal()) {
  counts[i] = new double[getInputFormat().attribute(i).numValues()];
         if (counts[i].length > 0)
           counts[i][0] = sumOfWeights;
}
     }
     double[] sums = new double[getInputFormat().numAttributes()];
     for (int i = 0; i < sums.length; i++) {
sums[i] = sumOfWeights;
     }
     double[] results = new double[getInputFormat().numAttributes()];
     for (int j = 0; j < getInputFormat().numInstances(); j++) {
Instance inst = getInputFormat().instance(j);
for (int i = 0; i < inst.numValues(); i++) {
  if (!inst.isMissingSparse(i)) {
    double value = inst.valueSparse(i);
    if (inst.attributeSparse(i).isNominal()) {
             if (counts[inst.index(i)].length > 0) {
               counts[inst.index(i)][(int)value] += inst.weight();
               counts[inst.index(i)][0] -= inst.weight();
             }
    } else if (inst.attributeSparse(i).isNumeric()) {
      results[inst.index(i)] += inst.weight() * inst.valueSparse(i);
    }
  } else {
    if (inst.attributeSparse(i).isNominal()) {
             if (counts[inst.index(i)].length > 0) {
        counts[inst.index(i)][0] -= inst.weight();
             }
    } else if (inst.attributeSparse(i).isNumeric()) {
      sums[inst.index(i)] -= inst.weight();
    }
  }
}
     }
     m_ModesAndMeans = new double[getInputFormat().numAttributes()];
     for (int i = 0; i < getInputFormat().numAttributes(); i++) {
if (getInputFormat().attribute(i).isNominal()) {
         if (counts[i].length == 0)
           m_ModesAndMeans[i] = Utils.missingValue();
         else
    m_ModesAndMeans[i] = (double)Utils.maxIndex(counts[i]);
} else if (getInputFormat().attribute(i).isNumeric()) {
  if (Utils.gr(sums[i], 0)) {
    m_ModesAndMeans[i] = results[i] / sums[i];
  }
}
     }

     // Convert pending input instances
     for(int i = 0; i < getInputFormat().numInstances(); i++) {
convertInstance(getInputFormat().instance(i));
     }
   } 
   // Free memory
   flushInput();

   m_NewBatch = true;
   return (numPendingOutput() != 0);
 }

Source File: YATSI.java From collective-classification-weka-package with GNU General Public License v3.0

4 votes

/**
 * internal function for determining the class distribution for an instance, 
 * will be overridden by derived classes. <br/>
 * 
 * @param instance	the instance to get the distribution for
 * @return		the distribution for the given instance
 * @throws Exception	if something goes wrong
 */
@Override
protected double[] getDistribution(Instance instance) throws Exception {
  int         index;
  int         i;
  double[]    result;
  Instances   neighbors;
  Instance    inst;
  double[]    count;
  double[]    countNum;
  int         labelIndex;

  result = null;

  // find instance
  index = m_Data.indexOf(instance);
  if (index > -1) {
    // get neighbors
    neighbors = m_NNSearch.kNearestNeighbours(
                  m_Data.get(index), m_KNNDetermined);

    // count class label
    count    = new double[neighbors.numClasses()];
    countNum = new double[neighbors.numClasses()];
    for (i = 0; i < neighbors.numInstances(); i++) {
      inst = neighbors.instance(i);
      if (!inst.classIsMissing()) {
        count[(int) inst.classValue()] += inst.weight();
        countNum[(int) inst.classValue()]++;
      }
    }

    // build result
    result = new double[instance.numClasses()];
    for (i = 0; i < result.length; i++)
      result[i] = count[i];
    if (Utils.gr(Utils.sum(result), 0))
      Utils.normalize(result);
    else
      System.out.println(
          "No summed up weights: " + instance 
          + ", counts=" + Utils.arrayToString(countNum));
    labelIndex = Utils.maxIndex(count);
    // is it a clear-cut distribution?
    if (!Utils.eq(Utils.sum(count) - count[labelIndex], 0))
      m_ClearCutDistribution++;
    // did the label change due to weights?
    if (Utils.maxIndex(countNum) != labelIndex)
      m_WeightFlips++;
  }
  else {
    throw new Exception("Cannot find instance: " + instance + "\n" 
        + " -> pos=" + index 
        + " = " + m_Data.get(StrictMath.abs(index)));
  }

  return result;
}

Source File: PairedStats.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Calculates the derived statistics (significance etc).
  */
 public void calculateDerived() {

   xStats.calculateDerived();
   yStats.calculateDerived();
   differencesStats.calculateDerived();

   correlation = Double.NaN;
   if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev)
&& !Utils.eq(xStats.stdDev, 0)) {
     double slope = (xySum - xStats.sum * yStats.sum / count)
/ (xStats.sumSq - xStats.sum * xStats.mean);
     if (!Utils.eq(yStats.stdDev, 0)) {
correlation = slope * xStats.stdDev / yStats.stdDev;
     } else {
correlation = 1.0;
     }
   }

   if (Utils.gr(differencesStats.stdDev, 0)) {
     double tval = differencesStats.mean
* Math.sqrt(count)
/ differencesStats.stdDev;

     if (m_degreesOfFreedom >= 1){
       differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                        m_degreesOfFreedom);
     } else {
       if (count > 1) {
         differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                          (int) count - 1);
       } else {
         differencesProbability = 1;
       }
     }
   } else {
     if (differencesStats.sumSq == 0) {
differencesProbability = 1.0;
     } else {
differencesProbability = 0.0;
     }
   }
   differencesSignificance = 0;
   if (differencesProbability <= sigLevel) {
     if (xStats.mean > yStats.mean) {
differencesSignificance = 1;
     } else {
differencesSignificance = -1;
     }
   }
 }

Source File: ConjunctiveRule.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Builds a single rule learner with REP dealing with nominal classes or
  * numeric classes.
  * For nominal classes, this rule learner predicts a distribution on
  * the classes.
  * For numeric classes, this learner predicts a single value.
  *
  * @param instances the training data
  * @throws Exception if classifier can't be built successfully
  */
 public void buildClassifier(Instances instances) throws Exception {
   // can classifier handle the data?
   getCapabilities().testWithFail(instances);

   // remove instances with missing class
   Instances data = new Instances(instances);
   data.deleteWithMissingClass();
   
   if(data.numInstances() < m_Folds)
     throw new Exception("Not enough data for REP.");

   m_ClassAttribute = data.classAttribute();
   if(m_ClassAttribute.isNominal())
     m_NumClasses = m_ClassAttribute.numValues();
   else
     m_NumClasses = 1;

   m_Antds = new FastVector();
   m_DefDstr = new double[m_NumClasses];
   m_Cnsqt = new double[m_NumClasses];
   m_Targets = new FastVector();	    
   m_Random = new Random(m_Seed);
   
   if(m_NumAntds != -1){
     grow(data);
   }
   else{

     data.randomize(m_Random);

     // Split data into Grow and Prune	   
     data.stratify(m_Folds);

     Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
     Instances pruneData=data.testCV(m_Folds, m_Folds-1);

     grow(growData);      // Build this rule  
     prune(pruneData);    // Prune this rule		  	  
   }

   if(m_ClassAttribute.isNominal()){			   
     Utils.normalize(m_Cnsqt);
     if(Utils.gr(Utils.sum(m_DefDstr), 0))
Utils.normalize(m_DefDstr);
   }	
 }

Source File: MINND.java From tsml with GNU General Public License v3.0

4 votes

/**
 * This function calculates the Kullback Leibler distance between
 * two normal distributions.  This distance is always positive. 
 * Kullback Leibler distance = integral{f(X)ln(f(X)/g(X))}
 * Note that X is a vector.  Since we assume dimensions are independent
 * f(X)(g(X) the same) is actually the product of normal density
 * functions of each dimensions.  Also note that it should be log2
 * instead of (ln) in the formula, but we use (ln) simply for computational
 * convenience.
 *
 * The result is as follows, suppose there are P dimensions, and f(X)
 * is the first distribution and g(X) is the second:
 * Kullback = sum[1..P](ln(SIGMA2/SIGMA1)) +
 *            sum[1..P](SIGMA1^2 / (2*(SIGMA2^2))) +
 *            sum[1..P]((MU1-MU2)^2 / (2*(SIGMA2^2))) -
 *            P/2
 *
 * @param mu1 mu of the first normal distribution
 * @param mu2 mu of the second normal distribution 
 * @param var1 variance(SIGMA^2) of the first normal distribution
 * @param var2 variance(SIGMA^2) of the second normal distribution
 * @return the Kullback distance of two distributions
 */
public double kullback(double[] mu1, double[] mu2,
    double[] var1, double[] var2, int pos){
  int p = mu1.length;
  double result = 0;

  for(int y=0; y < p; y++){
    if((Utils.gr(var1[y], 0)) && (Utils.gr(var2[y], 0))){
      result +=  
        ((Math.log(Math.sqrt(var2[y]/var1[y]))) +
         (var1[y] / (2.0*var2[y])) + 
         (m_Change[pos][y] * (mu1[y]-mu2[y])*(mu1[y]-mu2[y]) / (2.0*var2[y])) -
         0.5);
    }
  }

  return result;
}

Source File: MINND.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Pre-process the given exemplar according to the other exemplars 
 * in the given exemplars.  It also updates noise data statistics.
 *
 * @param data the whole exemplars
 * @param pos the position of given exemplar in data
 * @return the processed exemplar
 * @throws Exception if the returned exemplar is wrong 
 */
public Instance preprocess(Instances data, int pos)
  throws Exception{
  Instance before = data.instance(pos);
  if((int)before.classValue() == 0){
    m_NoiseM[pos] = null;
    m_NoiseV[pos] = null;
    return before;
  }

  Instances after_relationInsts =before.attribute(1).relation().stringFreeStructure();
  Instances noises_relationInsts =before.attribute(1).relation().stringFreeStructure();

  Instances newData = m_Attributes;
  Instance after = new DenseInstance(before.numAttributes());
  Instance noises =  new DenseInstance(before.numAttributes());
  after.setDataset(newData);
  noises.setDataset(newData);

  for(int g=0; g < before.relationalValue(1).numInstances(); g++){
    Instance datum = before.relationalValue(1).instance(g);
    double[] dists = new double[data.numInstances()];

    for(int i=0; i < data.numInstances(); i++){
      if(i != pos)
        dists[i] = distance(datum, m_Mean[i], m_Variance[i], i);
      else
        dists[i] = Double.POSITIVE_INFINITY;
    }		   

    int[] pred = new int[m_NumClasses];
    for(int n=0; n < pred.length; n++)
      pred[n] = 0;

    for(int o=0; o<m_Select; o++){
      int index = Utils.minIndex(dists);
      pred[(int)m_Class[index]]++;
      dists[index] = Double.POSITIVE_INFINITY;
    }

    int clas = Utils.maxIndex(pred);
    if((int)before.classValue() != clas)
      noises_relationInsts.add(datum);
    else
      after_relationInsts.add(datum);		
  }

  int relationValue;
  relationValue = noises.attribute(1).addRelation( noises_relationInsts);
  noises.setValue(0,before.value(0));
  noises.setValue(1, relationValue);
  noises.setValue(2, before.classValue());

  relationValue = after.attribute(1).addRelation( after_relationInsts);
  after.setValue(0,before.value(0));
  after.setValue(1, relationValue);
  after.setValue(2, before.classValue());


  if(Utils.gr(noises.relationalValue(1).sumOfWeights(), 0)){	
    for (int i=0; i<m_Dimension; i++) {
      m_NoiseM[pos][i] = noises.relationalValue(1).meanOrMode(i);
      m_NoiseV[pos][i] = noises.relationalValue(1).variance(i);
      if(Utils.eq(m_NoiseV[pos][i],0.0))
        m_NoiseV[pos][i] = m_ZERO;
    }
    /* for(int y=0; y < m_NoiseV[pos].length; y++){
       if(Utils.eq(m_NoiseV[pos][y],0.0))
       m_NoiseV[pos][y] = m_ZERO;
       } */	
  }
  else{
    m_NoiseM[pos] = null;
    m_NoiseV[pos] = null;
  }

  return after;
}

Source File: CollectiveTree.java From collective-classification-weka-package with GNU General Public License v3.0

4 votes

/**
 * determines the distribution of the instances with a non-missing value
 * at the given attribute position.
 * @param data        the instances to work on
 * @param indices     the sorted indices
 * @param att         the attribute to determine the distribution for
 * @return            the distribution
 */
protected double[] determineAttributeDistribution( Instances data,
                                                   int[] indices,
                                                   int att) {
  double[]      result;
  int           i;
  Instance      inst;
  int           count;
  double[]      values;
  double        median;

  // nominal attribute
  if (data.attribute(att).isNominal()) {
    result = new double[data.attribute(att).numValues()];

    // determine attribute distribution (necessary to distribute instances
    // with no class and missing attribute)
    for (i = 0; i < indices.length; i++) {
      inst = data.instance(indices[i]);
      if (inst.isMissing(att))
        break;
      result[(int) inst.value(att)] += inst.weight();
    }
  }
  // numeric attribute
  else {
    result = new double[2];   // less or greater/equal than median

    // determine number of instances w/o missing attribute
    count = 0;
    for (i = 0; i < indices.length; i++) {
      inst = data.instance(indices[i]);
      if (inst.isMissing(att))
        break;
      count++;
    }

    // determine median
    values = new double[count];
    for (i = 0; i < count; i++) {
      inst      = data.instance(indices[i]);
      values[i] = inst.value(att);
    }
    if (values.length == 0)
      median = 0;
    else if (values.length == 1)
      median = values[0];
    else
      median = Utils.kthSmallestValue(values, values.length / 2);

    // disitribute
    for (i = 0; i < count; i++) {
      inst = data.instance(indices[i]);
      if (Utils.sm(inst.value(att), median))
        result[0] += inst.weight();
      else
        result[1] += inst.weight();
    }
  }

  if (Utils.gr(Utils.sum(result), 0))
    Utils.normalize(result);

  return result;
}

Source File: MINND.java From tsml with GNU General Public License v3.0

4 votes

/**
   * Use gradient descent to distort the MU parameter for
   * the exemplar.  The exemplar can be in the specified row in the 
   * given matrix, which has numExemplar rows and numDimension columns;
   * or not in the matrix.
   * 
   * @param row the given row index
   * @param mean
   */
  public void findWeights(int row, double[][] mean){

    double[] neww = new double[m_Dimension];
    double[] oldw = new double[m_Dimension];
    System.arraycopy(m_Change[row], 0, neww, 0, m_Dimension);
    //for(int z=0; z<m_Dimension; z++)
    //System.out.println("mu("+row+"): "+origin[z]+" | "+newmu[z]);
    double newresult = target(neww, mean, row, m_Class);
    double result = Double.POSITIVE_INFINITY;
    double rate= 0.05;
    if(m_Rate != -1)
      rate = m_Rate;
    //System.out.println("???Start searching ...");
search: 
    while(Utils.gr((result-newresult), m_STOP)){ // Full step
      oldw = neww;
      neww= new double[m_Dimension];

      double[] delta = delta(oldw, mean, row, m_Class);

      for(int i=0; i < m_Dimension; i++)
        if(Utils.gr(m_Variance[row][i], 0.0))
          neww[i] = oldw[i] + rate * delta[i];

      result = newresult;
      newresult = target(neww, mean, row, m_Class);

      //System.out.println("???old: "+result+"|new: "+newresult);
      while(Utils.gr(newresult, result)){ // Search back
        //System.out.println("search back");
        if(m_Rate == -1){
          rate *= m_Decay; // Decay
          for(int i=0; i < m_Dimension; i++)
            if(Utils.gr(m_Variance[row][i], 0.0))
              neww[i] = oldw[i] + rate * delta[i];
          newresult = target(neww, mean, row, m_Class);
        }
        else{
          for(int i=0; i < m_Dimension; i++)
            neww[i] = oldw[i];
          break search;
        }
      }
    }
    //System.out.println("???Stop");
    m_Change[row] = neww;
  }

Source File: MultiClassClassifierUpdateable.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Returns the distribution for an instance.
 * 
 * @param inst the instance to get the distribution for
 * @return the distribution
 * @throws Exception if the distribution can't be computed successfully
 */
@Override
public double[] distributionForInstance(Instance inst) throws Exception {

  if (m_Classifiers.length == 1) {
    return m_Classifiers[0].distributionForInstance(inst);
  }

  double[] probs = new double[inst.numClasses()];
  if (m_Method == METHOD_1_AGAINST_1) {
    double[][] r = new double[inst.numClasses()][inst.numClasses()];
    double[][] n = new double[inst.numClasses()][inst.numClasses()];

    for (int i = 0; i < m_ClassFilters.length; i++) {
      if (m_Classifiers[i] != null && m_SumOfWeights[i] > 0) {
        Instance tempInst = (Instance) inst.copy();
        tempInst.setDataset(m_TwoClassDataset);
        double[] current = m_Classifiers[i].distributionForInstance(tempInst);
        Range range = new Range(
            ((RemoveWithValues) m_ClassFilters[i]).getNominalIndices());
        range.setUpper(m_ClassAttribute.numValues());
        int[] pair = range.getSelection();
        if (m_pairwiseCoupling && inst.numClasses() > 2) {
          r[pair[0]][pair[1]] = current[0];
          n[pair[0]][pair[1]] = m_SumOfWeights[i];
        } else {
          if (current[0] > current[1]) {
            probs[pair[0]] += 1.0;
          } else {
            probs[pair[1]] += 1.0;
          }
        }
      }
    }
    if (m_pairwiseCoupling && inst.numClasses() > 2) {
      try {
        return pairwiseCoupling(n, r);
      } catch (IllegalArgumentException ex) {
      }
    }
    if (Utils.gr(Utils.sum(probs), 0)) {
      Utils.normalize(probs);
    }
    return probs;
  } else {
    probs = super.distributionForInstance(inst);
  }

  /*
   * if (probs.length == 1) { // ZeroR made the prediction return new
   * double[m_ClassAttribute.numValues()]; }
   */

  return probs;
}

Source File: RuleStats.java From tsml with GNU General Public License v3.0

2 votes

/**
 * Subset description length: <br>
 * S(t,k,p) = -k*log2(p)-(n-k)log2(1-p)
 *
 * Details see Quilan: "MDL and categorical theories (Continued)",ML95
 *
 * @param t the number of elements in a known set
 * @param k the number of elements in a subset
 * @param p the expected proportion of subset known by recipient
 * @return the subset description length
 */
public static double subsetDL(double t, double k, double p){
  double rt = Utils.gr(p, 0.0) ? (- k*Utils.log2(p)) : 0.0;
  rt -= (t-k)*Utils.log2(1-p);
  return rt;
}

Java Code Examples for weka.core.Utils#gr()