weka.core.Utils#normalize

Source File: MOA.java From moa with GNU General Public License v3.0

6 votes

/**
 * Predicts the class memberships for a given instance. If
 * an instance is unclassified, the returned array elements
 * must be all zero. If the class is numeric, the array
 * must consist of only one element, which contains the
 * predicted value.
 *
 * @param instance the instance to be classified
 * @return an array containing the estimated membership
 * probabilities of the test instance in each class
 * or the numeric prediction
 * @throws Exception if distribution could not be
 * computed successfully
 */
public double[] distributionForInstance(Instance instance) throws Exception {
	double[]	result;

	result = m_ActualClassifier.getVotesForInstance(instanceConverter.samoaInstance(instance));
      // ensure that the array has as many elements as there are
      // class values!
      if (result.length < instance.numClasses()) {
        double[] newResult = new double[instance.numClasses()];
        System.arraycopy(result, 0, newResult, 0, result.length);
        result = newResult;
      }

	try {
		Utils.normalize(result);
	}
	catch (Exception e) {
		result = new double[instance.numClasses()];
	}

	return result;
}

Source File: Bagging.java From tsml with GNU General Public License v3.0

6 votes

/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance the instance to be classified
  * @return predicted class probability distribution
  * @throws Exception if distribution can't be computed successfully 
  */
 public double[] distributionForInstance(Instance instance) throws Exception {

   double [] sums = new double [instance.numClasses()], newProbs; 
   for (int i = 0; i < m_NumIterations; i++) {
     if (instance.classAttribute().isNumeric() == true) {
sums[0] += m_Classifiers[i].classifyInstance(instance);
     } else {
newProbs = m_Classifiers[i].distributionForInstance(instance);
for (int j = 0; j < newProbs.length; j++)
  sums[j] += newProbs[j];
     }
   }
   if (instance.classAttribute().isNumeric() == true) {
     sums[0] /= (double)m_NumIterations;
     return sums;
   } else if (Utils.eq(Utils.sum(sums), 0)) {
     return sums;
   } else {
     Utils.normalize(sums);
     return sums;
   }
 }

Source File: END.java From tsml with GNU General Public License v3.0

6 votes

/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance the instance to be classified
  * @return preedicted class probability distribution
  * @throws Exception if distribution can't be computed successfully 
  */
 public double[] distributionForInstance(Instance instance) throws Exception {
   
   double [] sums = new double [instance.numClasses()], newProbs; 
   
   for (int i = 0; i < m_NumIterations; i++) {
     if (instance.classAttribute().isNumeric() == true) {
sums[0] += m_Classifiers[i].classifyInstance(instance);
     } else {
newProbs = m_Classifiers[i].distributionForInstance(instance);
for (int j = 0; j < newProbs.length; j++)
  sums[j] += newProbs[j];
     }
   }
   if (instance.classAttribute().isNumeric() == true) {
     sums[0] /= (double)m_NumIterations;
     return sums;
   } else if (Utils.eq(Utils.sum(sums), 0)) {
     return sums;
   } else {
     Utils.normalize(sums);
     return sums;
   }
 }

Source File: MIBoost.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Computes the distribution for a given exemplar
 *
 * @param exmp the exemplar for which distribution is computed
 * @return the classification
 * @throws Exception if the distribution can't be computed successfully
 */
public double[] distributionForInstance(Instance exmp) 
  throws Exception { 

  double[] rt = new double[m_NumClasses];

  Instances insts = new Instances(exmp.dataset(), 0);
  insts.add(exmp);

  // convert the training dataset into single-instance dataset
  insts = Filter.useFilter( insts, m_ConvertToSI);
  insts.deleteAttributeAt(0); //remove the bagIndex attribute	

  double n = insts.numInstances();

  if(m_DiscretizeBin > 0)
    insts = Filter.useFilter(insts, m_Filter);

  for(int y=0; y<n; y++){
    Instance ins = insts.instance(y);	
    for(int x=0; x<m_NumIterations; x++){ 
      rt[(int)m_Models[x].classifyInstance(ins)] += m_Beta[x]/n;
    }
  }

  for(int i=0; i<rt.length; i++)
    rt[i] = Math.exp(rt[i]);

  Utils.normalize(rt);
  return rt;
}

Source File: NaiveBayesMultinomialUpdateable.java From tsml with GNU General Public License v3.0

5 votes

/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance 	the instance to be classified
  * @return 		predicted class probability distribution
  * @throws Exception 	if there is a problem generating the prediction
  */
 public double[] distributionForInstance(Instance instance) throws Exception {
   double[] probOfClassGivenDoc = new double[m_numClasses];

   // calculate the array of log(Pr[D|C])
   double[] logDocGivenClass = new double[m_numClasses];
   for (int c = 0; c < m_numClasses; c++) {
     logDocGivenClass[c] += Math.log(m_probOfClass[c]);
     int allWords = 0;
     for (int i = 0; i < instance.numValues(); i++) {
if (instance.index(i) == instance.classIndex())
  continue;
double frequencies = instance.valueSparse(i);
allWords += frequencies;
logDocGivenClass[c] += frequencies *
Math.log(m_probOfWordGivenClass[c][instance.index(i)]);
     }
     logDocGivenClass[c] -= allWords * Math.log(m_wordsPerClass[c]);
   }

   double max = logDocGivenClass[Utils.maxIndex(logDocGivenClass)];
   for (int i = 0; i < m_numClasses; i++)
     probOfClassGivenDoc[i] = Math.exp(logDocGivenClass[i] - max);

   Utils.normalize(probOfClassGivenDoc);

   return probOfClassGivenDoc;
 }

Source File: ContractRotationForest.java From tsml with GNU General Public License v3.0

5 votes

/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance the instance to be classified
  * @return preedicted class probability distribution
  * @throws Exception if distribution can't be computed successfully 
  */
 @Override
 public double[] distributionForInstance(Instance instance) throws Exception {

   removeUseless.input(instance);
   instance =removeUseless.output();
   removeUseless.batchFinished();

   normalize.input(instance);
   instance =normalize.output();
   normalize.batchFinished();

   double [] sums = new double [instance.numClasses()], newProbs; 
   
   for (int i = 0; i < classifiers.size(); i++) {
     Instance convertedInstance = convertInstance(instance, i);
     if (instance.classAttribute().isNumeric() == true) {
sums[0] += classifiers.get(i).classifyInstance(convertedInstance);
     } else {
newProbs = classifiers.get(i).distributionForInstance(convertedInstance);
for (int j = 0; j < newProbs.length; j++)
  sums[j] += newProbs[j];
     }
   }
   if (instance.classAttribute().isNumeric() == true) {
     sums[0] /= (double)classifiers.size();
     return sums;
   } else if (Utils.eq(Utils.sum(sums), 0)) {
     return sums;
   } else {
     Utils.normalize(sums);
     return sums;
   }
 }

Source File: DynamicWeightedMajority.java From moa with GNU General Public License v3.0

5 votes

@Override
public double[] getVotesForInstance(Instance inst) {
    double[] Pr = new double[inst.numClasses()];
    for (int i = 0; i < this.experts.size(); i++) {
        double[] pr = this.experts.get(i).getVotesForInstance(inst);
        int yHat = Utils.maxIndex(pr);
        Pr[yHat] += this.weights.get(i);
    } // for
    Utils.normalize(Pr);
    return Pr;
}

Source File: Vote.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Classifies a given instance using the selected combination rule.
 * 
 * @param instance the instance to be classified
 * @return the distribution
 * @throws Exception if instance could not be classified successfully
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {
  double[] result = new double[instance.numClasses()];

  switch (m_CombinationRule) {
  case AVERAGE_RULE:
    result = distributionForInstanceAverage(instance);
    break;
  case PRODUCT_RULE:
    result = distributionForInstanceProduct(instance);
    break;
  case MAJORITY_VOTING_RULE:
    result = distributionForInstanceMajorityVoting(instance);
    break;
  case MIN_RULE:
    result = distributionForInstanceMin(instance);
    break;
  case MAX_RULE:
    result = distributionForInstanceMax(instance);
    break;
  case MEDIAN_RULE:
    result[0] = classifyInstance(instance);
    break;
  default:
    throw new IllegalStateException("Unknown combination rule '"
        + m_CombinationRule + "'!");
  }

  if (!instance.classAttribute().isNumeric() && (Utils.sum(result) > 0))
    Utils.normalize(result);

  return result;
}

Source File: StackingC.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Classifies a given instance using the stacked classifier.
 *
 * @param instance the instance to be classified
 * @return the distribution
 * @throws Exception if instance could not be classified
 * successfully
 */
public double[] distributionForInstance(Instance instance) throws Exception {

  int [] arrIdc = new int[m_Classifiers.length+1];
  arrIdc[m_Classifiers.length] = m_MetaFormat.numAttributes() - 1;
  double [] classProbs = new double[m_BaseFormat.numClasses()];
  Instance newInst;
  double sum = 0;

  for (int i = 0; i < m_MetaClassifiers.length; i++) {
    for (int j = 0; j < m_Classifiers.length; j++) {
        arrIdc[j] = m_BaseFormat.numClasses() * j + i;
    }
    m_makeIndicatorFilter.setAttributeIndex("" + (m_MetaFormat.classIndex() + 1));
    m_makeIndicatorFilter.setNumeric(true);
    m_makeIndicatorFilter.setValueIndex(i);
    m_makeIndicatorFilter.setInputFormat(m_MetaFormat);
    m_makeIndicatorFilter.input(metaInstance(instance));
    m_makeIndicatorFilter.batchFinished();
    newInst = m_makeIndicatorFilter.output();

    m_attrFilter.setAttributeIndicesArray(arrIdc);
    m_attrFilter.setInvertSelection(true);
    m_attrFilter.setInputFormat(m_makeIndicatorFilter.getOutputFormat());
    m_attrFilter.input(newInst);
    m_attrFilter.batchFinished();
    newInst = m_attrFilter.output();

    classProbs[i]=m_MetaClassifiers[i].classifyInstance(newInst);
    if (classProbs[i] > 1) { classProbs[i] = 1; }
    if (classProbs[i] < 0) { classProbs[i] = 0; }
    sum += classProbs[i];
  }

  if (sum!=0) Utils.normalize(classProbs,sum);

  return classProbs;
}

Source File: LBR.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Calculates the class membership probabilities.
 * for the given test instance.
 *
 * @param instance the instance to be classified
 * @param instanceIndex 
 *
 * @return predicted class probability distribution
 * @throws Exception if distribution can't be computed
 */
public double[] localDistributionForInstance(Instance instance, Indexes instanceIndex) throws Exception {
  
  double sumForPriors = 0;
  double sumForCounts = 0;
  int attIndex, AIndex;
  int numClassesOfInstance = instance.numClasses();
  
  sumForPriors = 0;
  sumForCounts = 0;
  instanceIndex.setSequentialDataset(true);
  // Calculate all of conditional probabilities.
  sumForPriors = Utils.sum(m_Priors) + numClassesOfInstance;
  for (int j = 0; j < numClassesOfInstance; j++) {
    // pointer to counts to make access more efficient in loop
    int [][] countsPointer = m_Counts[j];
    posteriorsArray[j] = (m_Priors[j] + 1) / (sumForPriors);
    for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {
      AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];
      sumForCounts = Utils.sum(countsPointer[AIndex]);
      if (!instance.isMissing(AIndex)) {
        posteriorsArray[j] *= ((countsPointer[AIndex][(int)instance.value(AIndex)] + 1) / (sumForCounts + (double)instance.attribute(AIndex).numValues()));
      }
    }
  }
  
  // Normalize probabilities
  Utils.normalize(posteriorsArray);
  
  return posteriorsArray;
}

Source File: RotationForest.java From tsml with GNU General Public License v3.0

5 votes

/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance the instance to be classified
  * @return preedicted class probability distribution
  * @throws Exception if distribution can't be computed successfully 
  */
 public double[] distributionForInstance(Instance instance) throws Exception {

   m_RemoveUseless.input(instance);
   instance =m_RemoveUseless.output();
   m_RemoveUseless.batchFinished();

   m_Normalize.input(instance);
   instance =m_Normalize.output();
   m_Normalize.batchFinished();

   double [] sums = new double [instance.numClasses()], newProbs; 
   
   for (int i = 0; i < m_Classifiers.length; i++) {
     Instance convertedInstance = convertInstance(instance, i);
     if (instance.classAttribute().isNumeric() == true) {
sums[0] += m_Classifiers[i].classifyInstance(convertedInstance);
     } else {
newProbs = m_Classifiers[i].distributionForInstance(convertedInstance);
for (int j = 0; j < newProbs.length; j++)
  sums[j] += newProbs[j];
     }
   }
   if (instance.classAttribute().isNumeric() == true) {
     sums[0] /= (double)m_NumIterations;
     return sums;
   } else if (Utils.eq(Utils.sum(sums), 0)) {
     return sums;
   } else {
     Utils.normalize(sums);
     return sums;
   }
 }

Source File: HoeffdingTree.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Returns class probabilities for an instance.
 * 
 * @param instance the instance to compute the distribution for
 * @return the class probabilities
 * @throws Exception if distribution can't be computed successfully
 */
@Override
public double[] distributionForInstance(Instance inst) throws Exception {

  Attribute classAtt = inst.classAttribute();
  double[] pred = new double[classAtt.numValues()];

  if (m_root != null) {
    LeafNode l = m_root.leafForInstance(inst, null, null);
    HNode actualNode = l.m_theNode;

    if (actualNode == null) {
      actualNode = l.m_parentNode;
    }

    pred = actualNode.getDistribution(inst, classAtt);

  } else {
    // all class values equally likely
    for (int i = 0; i < classAtt.numValues(); i++) {
      pred[i] = 1;
    }
    Utils.normalize(pred);
  }

  // Utils.normalize(pred);
  return pred;
}

Source File: RandomTree.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Computes class distribution of an instance using the decision tree.
 * 
 * @param instance the instance to compute the distribution for
 * @return the computed class distribution
 * @throws Exception if computation fails
 */
public double[] distributionForInstance(Instance instance) throws Exception {

  double[] returnedDist = null;

  if (m_Attribute > -1) {

    // Node is not a leaf
    if (instance.isMissing(m_Attribute)) {

      // Value is missing
      returnedDist = new double[m_Info.numClasses()];

      // Split instance up
      for (int i = 0; i < m_Successors.length; i++) {
        double[] help = m_Successors[i].distributionForInstance(instance);
        if (help != null) {
          for (int j = 0; j < help.length; j++) {
            returnedDist[j] += m_Prop[i] * help[j];
          }
        }
      }
    } else if (m_Info.attribute(m_Attribute).isNominal()) {

      // For nominal attributes
      returnedDist = m_Successors[(int) instance.value(m_Attribute)]
          .distributionForInstance(instance);
    } else {

      // For numeric attributes
      if (instance.value(m_Attribute) < m_SplitPoint) {
        returnedDist = m_Successors[0].distributionForInstance(instance);
      } else {
        returnedDist = m_Successors[1].distributionForInstance(instance);
      }
    }
  }

  // Node is a leaf or successor is empty?
  if ((m_Attribute == -1) || (returnedDist == null)) {

    // Is node empty?
    if (m_ClassDistribution == null) {
      if (getAllowUnclassifiedInstances()) {
        return new double[m_Info.numClasses()];
      } else {
        return null;
      }
    }

    // Else return normalized distribution
    double[] normalizedDistribution = m_ClassDistribution.clone();
    Utils.normalize(normalizedDistribution);
    return normalizedDistribution;
  } else {
    return returnedDist;
  }
}

Source File: LPS.java From tsml with GNU General Public License v3.0

4 votes

/**
     * Computes class distribution of an instance using the decision tree.
     * 
     * @param instance the instance to compute the distribution for
     * @return the computed class distribution
     * @throws Exception if computation fails
     */
    public double[] distributionForInstance(Instance instance) throws Exception {

      double[] returnedDist = null;

      if(m_Attribute > -1) {
        // Node is not a leaf
        if (instance.isMissing(m_Attribute)) {

          // Value is missing
          returnedDist = new double[m_Info.numClasses()];

          // Split instance up
          for (int i = 0; i < m_Successors.length; i++) {
            double[] help = m_Successors[i].distributionForInstance(instance);
            if (help != null) {
              for (int j = 0; j < help.length; j++) {
                returnedDist[j] += m_Prop[i] * help[j];
              }
            }
          }
        } else if (m_Info.attribute(m_Attribute).isNominal()) {

          // For nominal attributes
          returnedDist = m_Successors[(int) instance.value(m_Attribute)]
            .distributionForInstance(instance);
        } else {

          // For numeric attributes
          if (instance.value(m_Attribute) < m_SplitPoint) {
            returnedDist = m_Successors[0].distributionForInstance(instance);
          } else {
            returnedDist = m_Successors[1].distributionForInstance(instance);
          }
        }
      }

      // Node is a leaf or successor is empty?
      if ((m_Attribute == -1) || (returnedDist == null)) {
        lastNode=leafNodeID;
//          System.out.println("Setting last node ="+leafNodeID);
        // Is node empty?
        if (m_ClassDistribution == null) {
          if (getAllowUnclassifiedInstances()) {
            double[] result = new double[m_Info.numClasses()];
            if (m_Info.classAttribute().isNumeric()) {
              result[0] = Utils.missingValue();
            }
            return result;
          } else {
            return null;
          }
        }

        // Else return normalized distribution
        double[] normalizedDistribution = m_ClassDistribution.clone();
        if (m_Info.classAttribute().isNominal()) {
          Utils.normalize(normalizedDistribution);
        }
        return normalizedDistribution;
      } else {
        return returnedDist;
      }
    }

Source File: IBk.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Turn the list of nearest neighbors into a probability distribution.
  *
  * @param neighbours the list of nearest neighboring instances
  * @param distances the distances of the neighbors
  * @return the probability distribution
  * @throws Exception if computation goes wrong or has no class attribute
  */
 protected double [] makeDistribution(Instances neighbours, double[] distances)
   throws Exception {

   double total = 0, weight;
   double [] distribution = new double [m_NumClasses];
   
   // Set up a correction to the estimator
   if (m_ClassType == Attribute.NOMINAL) {
     for(int i = 0; i < m_NumClasses; i++) {
distribution[i] = 1.0 / Math.max(1,m_Train.numInstances());
     }
     total = (double)m_NumClasses / Math.max(1,m_Train.numInstances());
   }

   for(int i=0; i < neighbours.numInstances(); i++) {
     // Collect class counts
     Instance current = neighbours.instance(i);
     distances[i] = distances[i]*distances[i];
     distances[i] = Math.sqrt(distances[i]/m_NumAttributesUsed);
     switch (m_DistanceWeighting) {
       case WEIGHT_INVERSE:
         weight = 1.0 / (distances[i] + 0.001); // to avoid div by zero
         break;
       case WEIGHT_SIMILARITY:
         weight = 1.0 - distances[i];
         break;
       default:                                 // WEIGHT_NONE:
         weight = 1.0;
         break;
     }
     weight *= current.weight();
     try {
       switch (m_ClassType) {
         case Attribute.NOMINAL:
           distribution[(int)current.classValue()] += weight;
           break;
         case Attribute.NUMERIC:
           distribution[0] += current.classValue() * weight;
           break;
       }
     } catch (Exception ex) {
       throw new Error("Data has no class attribute!");
     }
     total += weight;      
   }

   // Normalise distribution
   if (total > 0) {
     Utils.normalize(distribution, total);
   }
   return distribution;
 }

Source File: DTNB.java From tsml with GNU General Public License v3.0

4 votes

/**
   * Calculates the class membership probabilities for the given 
   * test instance.
   *
   * @param instance the instance to be classified
   * @return predicted class probability distribution
   * @exception Exception if distribution can't be computed
   */
  public double [] distributionForInstance(Instance instance)
  throws Exception {

    DecisionTableHashKey thekey;
    double [] tempDist;
    double [] normDist;

    m_disTransform.input(instance);
    m_disTransform.batchFinished();
    instance = m_disTransform.output();

    m_delTransform.input(instance);
    m_delTransform.batchFinished();
    Instance dtInstance = m_delTransform.output();

    thekey = new DecisionTableHashKey(dtInstance, dtInstance.numAttributes(), false);

    // if this one is not in the table
    if ((tempDist = (double [])m_entries.get(thekey)) == null) {
      if (m_useIBk) {
	tempDist = m_ibk.distributionForInstance(dtInstance);
      } else {  
	// tempDist = new double [m_theInstances.classAttribute().numValues()];
//	tempDist[(int)m_majority] = 1.0;
	
	tempDist = m_classPriors.clone();
	// return tempDist; ??????
      }
    } else {
      // normalise distribution
      normDist = new double [tempDist.length];
      System.arraycopy(tempDist,0,normDist,0,tempDist.length);
      Utils.normalize(normDist);
      tempDist = normDist;			
    }

    double [] nbDist = m_NB.distributionForInstance(instance);
    for (int i = 0; i < nbDist.length; i++) {
      tempDist[i] = (Math.log(tempDist[i]) - Math.log(m_classPriors[i]));
      tempDist[i] += Math.log(nbDist[i]);

      /*tempDist[i] *= nbDist[i];
      tempDist[i] /= m_classPriors[i];*/
    }
    tempDist = Utils.logs2probs(tempDist);
    Utils.normalize(tempDist);

    return tempDist;
  }

Source File: HNB.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Calculates the class membership probabilities for the given test instance
 *
 * @param instance the instance to be classified
 * @return predicted class probability distribution
 * @exception Exception if there is a problem generating the prediction
 */
public double[] distributionForInstance(Instance instance) throws Exception {

  //Definition of local variables
  double[] probs = new double[m_NumClasses];
  int sIndex;
  double prob;
  double condiMutualInfoSum;

  // store instance's att values in an int array
  int[] attIndex = new int[m_NumAttributes];
  for(int att = 0; att < m_NumAttributes; att++) {
    if(att == m_ClassIndex)
      attIndex[att] = -1;
    else
      attIndex[att] = m_StartAttIndex[att] + (int)instance.value(att);
  }

  // calculate probabilities for each possible class value
  for(int classVal = 0; classVal < m_NumClasses; classVal++) {
    probs[classVal]=(m_ClassCounts[classVal]+1.0/m_NumClasses)/(m_NumInstances+1.0);
    for(int son = 0; son < m_NumAttributes; son++) {
      if(attIndex[son]==-1) continue;
      sIndex=attIndex[son];
      attIndex[son]=-1;
      prob=0;
      condiMutualInfoSum=0;
      for(int parent=0; parent<m_NumAttributes; parent++) {
        if(attIndex[parent]==-1) continue;
        condiMutualInfoSum+=m_condiMutualInfo[son][parent];
        prob+=m_condiMutualInfo[son][parent]*(m_ClassAttAttCounts[classVal][attIndex[parent]][sIndex]+1.0/m_NumAttValues[son])/(m_ClassAttAttCounts[classVal][attIndex[parent]][attIndex[parent]] + 1.0);
      }
      if(condiMutualInfoSum>0){
        prob=prob/condiMutualInfoSum;
        probs[classVal] *= prob;
      }
      else{
        prob=(m_ClassAttAttCounts[classVal][sIndex][sIndex]+1.0/m_NumAttValues[son])/(m_ClassCounts[classVal]+1.0);
        probs[classVal]*= prob;
      }
      attIndex[son] = sIndex;
    }
  }
  Utils.normalize(probs);
  return probs;
}

Source File: CollectiveTree.java From collective-classification-weka-package with GNU General Public License v3.0

4 votes

/**
 * determines the distribution of the instances with a non-missing value
 * at the given attribute position.
 * @param data        the instances to work on
 * @param indices     the sorted indices
 * @param att         the attribute to determine the distribution for
 * @return            the distribution
 */
protected double[] determineAttributeDistribution( Instances data,
                                                   int[] indices,
                                                   int att) {
  double[]      result;
  int           i;
  Instance      inst;
  int           count;
  double[]      values;
  double        median;

  // nominal attribute
  if (data.attribute(att).isNominal()) {
    result = new double[data.attribute(att).numValues()];

    // determine attribute distribution (necessary to distribute instances
    // with no class and missing attribute)
    for (i = 0; i < indices.length; i++) {
      inst = data.instance(indices[i]);
      if (inst.isMissing(att))
        break;
      result[(int) inst.value(att)] += inst.weight();
    }
  }
  // numeric attribute
  else {
    result = new double[2];   // less or greater/equal than median

    // determine number of instances w/o missing attribute
    count = 0;
    for (i = 0; i < indices.length; i++) {
      inst = data.instance(indices[i]);
      if (inst.isMissing(att))
        break;
      count++;
    }

    // determine median
    values = new double[count];
    for (i = 0; i < count; i++) {
      inst      = data.instance(indices[i]);
      values[i] = inst.value(att);
    }
    if (values.length == 0)
      median = 0;
    else if (values.length == 1)
      median = values[0];
    else
      median = Utils.kthSmallestValue(values, values.length / 2);

    // disitribute
    for (i = 0; i < count; i++) {
      inst = data.instance(indices[i]);
      if (Utils.sm(inst.value(att), median))
        result[0] += inst.weight();
      else
        result[1] += inst.weight();
    }
  }

  if (Utils.gr(Utils.sum(result), 0))
    Utils.normalize(result);

  return result;
}

Source File: MultiClassClassifierUpdateable.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Returns the distribution for an instance.
 * 
 * @param inst the instance to get the distribution for
 * @return the distribution
 * @throws Exception if the distribution can't be computed successfully
 */
@Override
public double[] distributionForInstance(Instance inst) throws Exception {

  if (m_Classifiers.length == 1) {
    return m_Classifiers[0].distributionForInstance(inst);
  }

  double[] probs = new double[inst.numClasses()];
  if (m_Method == METHOD_1_AGAINST_1) {
    double[][] r = new double[inst.numClasses()][inst.numClasses()];
    double[][] n = new double[inst.numClasses()][inst.numClasses()];

    for (int i = 0; i < m_ClassFilters.length; i++) {
      if (m_Classifiers[i] != null && m_SumOfWeights[i] > 0) {
        Instance tempInst = (Instance) inst.copy();
        tempInst.setDataset(m_TwoClassDataset);
        double[] current = m_Classifiers[i].distributionForInstance(tempInst);
        Range range = new Range(
            ((RemoveWithValues) m_ClassFilters[i]).getNominalIndices());
        range.setUpper(m_ClassAttribute.numValues());
        int[] pair = range.getSelection();
        if (m_pairwiseCoupling && inst.numClasses() > 2) {
          r[pair[0]][pair[1]] = current[0];
          n[pair[0]][pair[1]] = m_SumOfWeights[i];
        } else {
          if (current[0] > current[1]) {
            probs[pair[0]] += 1.0;
          } else {
            probs[pair[1]] += 1.0;
          }
        }
      }
    }
    if (m_pairwiseCoupling && inst.numClasses() > 2) {
      try {
        return pairwiseCoupling(n, r);
      } catch (IllegalArgumentException ex) {
      }
    }
    if (Utils.gr(Utils.sum(probs), 0)) {
      Utils.normalize(probs);
    }
    return probs;
  } else {
    probs = super.distributionForInstance(inst);
  }

  /*
   * if (probs.length == 1) { // ZeroR made the prediction return new
   * double[m_ClassAttribute.numValues()]; }
   */

  return probs;
}

Source File: SimpleKMeansWithSilhouette.java From apogen with Apache License 2.0

4 votes

/**
 * Initialize using the k-means++ method
 * 
 * @param data
 *            the training data
 * @throws Exception
 *             if a problem occurs
 */
protected void kMeansPlusPlusInit(Instances data) throws Exception {
	Random randomO = new Random(getSeed());
	HashMap<DecisionTableHashKey, String> initC = new HashMap<DecisionTableHashKey, String>();

	// choose initial center uniformly at random
	int index = randomO.nextInt(data.numInstances());
	m_ClusterCentroids.add(data.instance(index));
	DecisionTableHashKey hk = new DecisionTableHashKey(data.instance(index), data.numAttributes(), true);
	initC.put(hk, null);

	int iteration = 0;
	int remainingInstances = data.numInstances() - 1;
	if (m_NumClusters > 1) {
		// proceed with selecting the rest

		// distances to the initial randomly chose center
		double[] distances = new double[data.numInstances()];
		double[] cumProbs = new double[data.numInstances()];
		for (int i = 0; i < data.numInstances(); i++) {
			distances[i] = m_DistanceFunction.distance(data.instance(i), m_ClusterCentroids.instance(iteration));
		}

		// now choose the remaining cluster centers
		for (int i = 1; i < m_NumClusters; i++) {

			// distances converted to probabilities
			double[] weights = new double[data.numInstances()];
			System.arraycopy(distances, 0, weights, 0, distances.length);
			Utils.normalize(weights);

			double sumOfProbs = 0;
			for (int k = 0; k < data.numInstances(); k++) {
				sumOfProbs += weights[k];
				cumProbs[k] = sumOfProbs;
			}

			cumProbs[data.numInstances() - 1] = 1.0; // make sure there are no
														// rounding issues

			// choose a random instance
			double prob = randomO.nextDouble();
			for (int k = 0; k < cumProbs.length; k++) {
				if (prob < cumProbs[k]) {
					Instance candidateCenter = data.instance(k);
					hk = new DecisionTableHashKey(candidateCenter, data.numAttributes(), true);
					if (!initC.containsKey(hk)) {
						initC.put(hk, null);
						m_ClusterCentroids.add(candidateCenter);
					} else {
						// we shouldn't get here because any instance that is a duplicate
						// of
						// an already chosen cluster center should have zero distance (and
						// hence
						// zero probability of getting chosen) to that center.
						System.err.println("We shouldn't get here....");
					}
					remainingInstances--;
					break;
				}
			}
			iteration++;

			if (remainingInstances == 0) {
				break;
			}

			// prepare to choose the next cluster center.
			// check distances against the new cluster center to see if it is closer
			for (int k = 0; k < data.numInstances(); k++) {
				if (distances[k] > 0) {
					double newDist = m_DistanceFunction.distance(data.instance(k),
							m_ClusterCentroids.instance(iteration));
					if (newDist < distances[k]) {
						distances[k] = newDist;
					}
				}
			}
		}
	}
}

Java Code Examples for weka.core.Utils#normalize()