Java Code Examples for weka.core.Utils#normalize()
The following examples show how to use
weka.core.Utils#normalize() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MOA.java From moa with GNU General Public License v3.0 | 6 votes |
/** * Predicts the class memberships for a given instance. If * an instance is unclassified, the returned array elements * must be all zero. If the class is numeric, the array * must consist of only one element, which contains the * predicted value. * * @param instance the instance to be classified * @return an array containing the estimated membership * probabilities of the test instance in each class * or the numeric prediction * @throws Exception if distribution could not be * computed successfully */ public double[] distributionForInstance(Instance instance) throws Exception { double[] result; result = m_ActualClassifier.getVotesForInstance(instanceConverter.samoaInstance(instance)); // ensure that the array has as many elements as there are // class values! if (result.length < instance.numClasses()) { double[] newResult = new double[instance.numClasses()]; System.arraycopy(result, 0, newResult, 0, result.length); result = newResult; } try { Utils.normalize(result); } catch (Exception e) { result = new double[instance.numClasses()]; } return result; }
Example 2
Source File: Bagging.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Calculates the class membership probabilities for the given test * instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @throws Exception if distribution can't be computed successfully */ public double[] distributionForInstance(Instance instance) throws Exception { double [] sums = new double [instance.numClasses()], newProbs; for (int i = 0; i < m_NumIterations; i++) { if (instance.classAttribute().isNumeric() == true) { sums[0] += m_Classifiers[i].classifyInstance(instance); } else { newProbs = m_Classifiers[i].distributionForInstance(instance); for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j]; } } if (instance.classAttribute().isNumeric() == true) { sums[0] /= (double)m_NumIterations; return sums; } else if (Utils.eq(Utils.sum(sums), 0)) { return sums; } else { Utils.normalize(sums); return sums; } }
Example 3
Source File: END.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Calculates the class membership probabilities for the given test * instance. * * @param instance the instance to be classified * @return preedicted class probability distribution * @throws Exception if distribution can't be computed successfully */ public double[] distributionForInstance(Instance instance) throws Exception { double [] sums = new double [instance.numClasses()], newProbs; for (int i = 0; i < m_NumIterations; i++) { if (instance.classAttribute().isNumeric() == true) { sums[0] += m_Classifiers[i].classifyInstance(instance); } else { newProbs = m_Classifiers[i].distributionForInstance(instance); for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j]; } } if (instance.classAttribute().isNumeric() == true) { sums[0] /= (double)m_NumIterations; return sums; } else if (Utils.eq(Utils.sum(sums), 0)) { return sums; } else { Utils.normalize(sums); return sums; } }
Example 4
Source File: MIBoost.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Computes the distribution for a given exemplar * * @param exmp the exemplar for which distribution is computed * @return the classification * @throws Exception if the distribution can't be computed successfully */ public double[] distributionForInstance(Instance exmp) throws Exception { double[] rt = new double[m_NumClasses]; Instances insts = new Instances(exmp.dataset(), 0); insts.add(exmp); // convert the training dataset into single-instance dataset insts = Filter.useFilter( insts, m_ConvertToSI); insts.deleteAttributeAt(0); //remove the bagIndex attribute double n = insts.numInstances(); if(m_DiscretizeBin > 0) insts = Filter.useFilter(insts, m_Filter); for(int y=0; y<n; y++){ Instance ins = insts.instance(y); for(int x=0; x<m_NumIterations; x++){ rt[(int)m_Models[x].classifyInstance(ins)] += m_Beta[x]/n; } } for(int i=0; i<rt.length; i++) rt[i] = Math.exp(rt[i]); Utils.normalize(rt); return rt; }
Example 5
Source File: NaiveBayesMultinomialUpdateable.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Calculates the class membership probabilities for the given test * instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @throws Exception if there is a problem generating the prediction */ public double[] distributionForInstance(Instance instance) throws Exception { double[] probOfClassGivenDoc = new double[m_numClasses]; // calculate the array of log(Pr[D|C]) double[] logDocGivenClass = new double[m_numClasses]; for (int c = 0; c < m_numClasses; c++) { logDocGivenClass[c] += Math.log(m_probOfClass[c]); int allWords = 0; for (int i = 0; i < instance.numValues(); i++) { if (instance.index(i) == instance.classIndex()) continue; double frequencies = instance.valueSparse(i); allWords += frequencies; logDocGivenClass[c] += frequencies * Math.log(m_probOfWordGivenClass[c][instance.index(i)]); } logDocGivenClass[c] -= allWords * Math.log(m_wordsPerClass[c]); } double max = logDocGivenClass[Utils.maxIndex(logDocGivenClass)]; for (int i = 0; i < m_numClasses; i++) probOfClassGivenDoc[i] = Math.exp(logDocGivenClass[i] - max); Utils.normalize(probOfClassGivenDoc); return probOfClassGivenDoc; }
Example 6
Source File: ContractRotationForest.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Calculates the class membership probabilities for the given test * instance. * * @param instance the instance to be classified * @return preedicted class probability distribution * @throws Exception if distribution can't be computed successfully */ @Override public double[] distributionForInstance(Instance instance) throws Exception { removeUseless.input(instance); instance =removeUseless.output(); removeUseless.batchFinished(); normalize.input(instance); instance =normalize.output(); normalize.batchFinished(); double [] sums = new double [instance.numClasses()], newProbs; for (int i = 0; i < classifiers.size(); i++) { Instance convertedInstance = convertInstance(instance, i); if (instance.classAttribute().isNumeric() == true) { sums[0] += classifiers.get(i).classifyInstance(convertedInstance); } else { newProbs = classifiers.get(i).distributionForInstance(convertedInstance); for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j]; } } if (instance.classAttribute().isNumeric() == true) { sums[0] /= (double)classifiers.size(); return sums; } else if (Utils.eq(Utils.sum(sums), 0)) { return sums; } else { Utils.normalize(sums); return sums; } }
Example 7
Source File: DynamicWeightedMajority.java From moa with GNU General Public License v3.0 | 5 votes |
@Override public double[] getVotesForInstance(Instance inst) { double[] Pr = new double[inst.numClasses()]; for (int i = 0; i < this.experts.size(); i++) { double[] pr = this.experts.get(i).getVotesForInstance(inst); int yHat = Utils.maxIndex(pr); Pr[yHat] += this.weights.get(i); } // for Utils.normalize(Pr); return Pr; }
Example 8
Source File: Vote.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Classifies a given instance using the selected combination rule. * * @param instance the instance to be classified * @return the distribution * @throws Exception if instance could not be classified successfully */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] result = new double[instance.numClasses()]; switch (m_CombinationRule) { case AVERAGE_RULE: result = distributionForInstanceAverage(instance); break; case PRODUCT_RULE: result = distributionForInstanceProduct(instance); break; case MAJORITY_VOTING_RULE: result = distributionForInstanceMajorityVoting(instance); break; case MIN_RULE: result = distributionForInstanceMin(instance); break; case MAX_RULE: result = distributionForInstanceMax(instance); break; case MEDIAN_RULE: result[0] = classifyInstance(instance); break; default: throw new IllegalStateException("Unknown combination rule '" + m_CombinationRule + "'!"); } if (!instance.classAttribute().isNumeric() && (Utils.sum(result) > 0)) Utils.normalize(result); return result; }
Example 9
Source File: StackingC.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Classifies a given instance using the stacked classifier. * * @param instance the instance to be classified * @return the distribution * @throws Exception if instance could not be classified * successfully */ public double[] distributionForInstance(Instance instance) throws Exception { int [] arrIdc = new int[m_Classifiers.length+1]; arrIdc[m_Classifiers.length] = m_MetaFormat.numAttributes() - 1; double [] classProbs = new double[m_BaseFormat.numClasses()]; Instance newInst; double sum = 0; for (int i = 0; i < m_MetaClassifiers.length; i++) { for (int j = 0; j < m_Classifiers.length; j++) { arrIdc[j] = m_BaseFormat.numClasses() * j + i; } m_makeIndicatorFilter.setAttributeIndex("" + (m_MetaFormat.classIndex() + 1)); m_makeIndicatorFilter.setNumeric(true); m_makeIndicatorFilter.setValueIndex(i); m_makeIndicatorFilter.setInputFormat(m_MetaFormat); m_makeIndicatorFilter.input(metaInstance(instance)); m_makeIndicatorFilter.batchFinished(); newInst = m_makeIndicatorFilter.output(); m_attrFilter.setAttributeIndicesArray(arrIdc); m_attrFilter.setInvertSelection(true); m_attrFilter.setInputFormat(m_makeIndicatorFilter.getOutputFormat()); m_attrFilter.input(newInst); m_attrFilter.batchFinished(); newInst = m_attrFilter.output(); classProbs[i]=m_MetaClassifiers[i].classifyInstance(newInst); if (classProbs[i] > 1) { classProbs[i] = 1; } if (classProbs[i] < 0) { classProbs[i] = 0; } sum += classProbs[i]; } if (sum!=0) Utils.normalize(classProbs,sum); return classProbs; }
Example 10
Source File: LBR.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Calculates the class membership probabilities. * for the given test instance. * * @param instance the instance to be classified * @param instanceIndex * * @return predicted class probability distribution * @throws Exception if distribution can't be computed */ public double[] localDistributionForInstance(Instance instance, Indexes instanceIndex) throws Exception { double sumForPriors = 0; double sumForCounts = 0; int attIndex, AIndex; int numClassesOfInstance = instance.numClasses(); sumForPriors = 0; sumForCounts = 0; instanceIndex.setSequentialDataset(true); // Calculate all of conditional probabilities. sumForPriors = Utils.sum(m_Priors) + numClassesOfInstance; for (int j = 0; j < numClassesOfInstance; j++) { // pointer to counts to make access more efficient in loop int [][] countsPointer = m_Counts[j]; posteriorsArray[j] = (m_Priors[j] + 1) / (sumForPriors); for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) { AIndex = instanceIndex.m_SequentialAttIndexes[attIndex]; sumForCounts = Utils.sum(countsPointer[AIndex]); if (!instance.isMissing(AIndex)) { posteriorsArray[j] *= ((countsPointer[AIndex][(int)instance.value(AIndex)] + 1) / (sumForCounts + (double)instance.attribute(AIndex).numValues())); } } } // Normalize probabilities Utils.normalize(posteriorsArray); return posteriorsArray; }
Example 11
Source File: RotationForest.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Calculates the class membership probabilities for the given test * instance. * * @param instance the instance to be classified * @return preedicted class probability distribution * @throws Exception if distribution can't be computed successfully */ public double[] distributionForInstance(Instance instance) throws Exception { m_RemoveUseless.input(instance); instance =m_RemoveUseless.output(); m_RemoveUseless.batchFinished(); m_Normalize.input(instance); instance =m_Normalize.output(); m_Normalize.batchFinished(); double [] sums = new double [instance.numClasses()], newProbs; for (int i = 0; i < m_Classifiers.length; i++) { Instance convertedInstance = convertInstance(instance, i); if (instance.classAttribute().isNumeric() == true) { sums[0] += m_Classifiers[i].classifyInstance(convertedInstance); } else { newProbs = m_Classifiers[i].distributionForInstance(convertedInstance); for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j]; } } if (instance.classAttribute().isNumeric() == true) { sums[0] /= (double)m_NumIterations; return sums; } else if (Utils.eq(Utils.sum(sums), 0)) { return sums; } else { Utils.normalize(sums); return sums; } }
Example 12
Source File: HoeffdingTree.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Returns class probabilities for an instance. * * @param instance the instance to compute the distribution for * @return the class probabilities * @throws Exception if distribution can't be computed successfully */ @Override public double[] distributionForInstance(Instance inst) throws Exception { Attribute classAtt = inst.classAttribute(); double[] pred = new double[classAtt.numValues()]; if (m_root != null) { LeafNode l = m_root.leafForInstance(inst, null, null); HNode actualNode = l.m_theNode; if (actualNode == null) { actualNode = l.m_parentNode; } pred = actualNode.getDistribution(inst, classAtt); } else { // all class values equally likely for (int i = 0; i < classAtt.numValues(); i++) { pred[i] = 1; } Utils.normalize(pred); } // Utils.normalize(pred); return pred; }
Example 13
Source File: RandomTree.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Computes class distribution of an instance using the decision tree. * * @param instance the instance to compute the distribution for * @return the computed class distribution * @throws Exception if computation fails */ public double[] distributionForInstance(Instance instance) throws Exception { double[] returnedDist = null; if (m_Attribute > -1) { // Node is not a leaf if (instance.isMissing(m_Attribute)) { // Value is missing returnedDist = new double[m_Info.numClasses()]; // Split instance up for (int i = 0; i < m_Successors.length; i++) { double[] help = m_Successors[i].distributionForInstance(instance); if (help != null) { for (int j = 0; j < help.length; j++) { returnedDist[j] += m_Prop[i] * help[j]; } } } } else if (m_Info.attribute(m_Attribute).isNominal()) { // For nominal attributes returnedDist = m_Successors[(int) instance.value(m_Attribute)] .distributionForInstance(instance); } else { // For numeric attributes if (instance.value(m_Attribute) < m_SplitPoint) { returnedDist = m_Successors[0].distributionForInstance(instance); } else { returnedDist = m_Successors[1].distributionForInstance(instance); } } } // Node is a leaf or successor is empty? if ((m_Attribute == -1) || (returnedDist == null)) { // Is node empty? if (m_ClassDistribution == null) { if (getAllowUnclassifiedInstances()) { return new double[m_Info.numClasses()]; } else { return null; } } // Else return normalized distribution double[] normalizedDistribution = m_ClassDistribution.clone(); Utils.normalize(normalizedDistribution); return normalizedDistribution; } else { return returnedDist; } }
Example 14
Source File: LPS.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Computes class distribution of an instance using the decision tree. * * @param instance the instance to compute the distribution for * @return the computed class distribution * @throws Exception if computation fails */ public double[] distributionForInstance(Instance instance) throws Exception { double[] returnedDist = null; if(m_Attribute > -1) { // Node is not a leaf if (instance.isMissing(m_Attribute)) { // Value is missing returnedDist = new double[m_Info.numClasses()]; // Split instance up for (int i = 0; i < m_Successors.length; i++) { double[] help = m_Successors[i].distributionForInstance(instance); if (help != null) { for (int j = 0; j < help.length; j++) { returnedDist[j] += m_Prop[i] * help[j]; } } } } else if (m_Info.attribute(m_Attribute).isNominal()) { // For nominal attributes returnedDist = m_Successors[(int) instance.value(m_Attribute)] .distributionForInstance(instance); } else { // For numeric attributes if (instance.value(m_Attribute) < m_SplitPoint) { returnedDist = m_Successors[0].distributionForInstance(instance); } else { returnedDist = m_Successors[1].distributionForInstance(instance); } } } // Node is a leaf or successor is empty? if ((m_Attribute == -1) || (returnedDist == null)) { lastNode=leafNodeID; // System.out.println("Setting last node ="+leafNodeID); // Is node empty? if (m_ClassDistribution == null) { if (getAllowUnclassifiedInstances()) { double[] result = new double[m_Info.numClasses()]; if (m_Info.classAttribute().isNumeric()) { result[0] = Utils.missingValue(); } return result; } else { return null; } } // Else return normalized distribution double[] normalizedDistribution = m_ClassDistribution.clone(); if (m_Info.classAttribute().isNominal()) { Utils.normalize(normalizedDistribution); } return normalizedDistribution; } else { return returnedDist; } }
Example 15
Source File: IBk.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Turn the list of nearest neighbors into a probability distribution. * * @param neighbours the list of nearest neighboring instances * @param distances the distances of the neighbors * @return the probability distribution * @throws Exception if computation goes wrong or has no class attribute */ protected double [] makeDistribution(Instances neighbours, double[] distances) throws Exception { double total = 0, weight; double [] distribution = new double [m_NumClasses]; // Set up a correction to the estimator if (m_ClassType == Attribute.NOMINAL) { for(int i = 0; i < m_NumClasses; i++) { distribution[i] = 1.0 / Math.max(1,m_Train.numInstances()); } total = (double)m_NumClasses / Math.max(1,m_Train.numInstances()); } for(int i=0; i < neighbours.numInstances(); i++) { // Collect class counts Instance current = neighbours.instance(i); distances[i] = distances[i]*distances[i]; distances[i] = Math.sqrt(distances[i]/m_NumAttributesUsed); switch (m_DistanceWeighting) { case WEIGHT_INVERSE: weight = 1.0 / (distances[i] + 0.001); // to avoid div by zero break; case WEIGHT_SIMILARITY: weight = 1.0 - distances[i]; break; default: // WEIGHT_NONE: weight = 1.0; break; } weight *= current.weight(); try { switch (m_ClassType) { case Attribute.NOMINAL: distribution[(int)current.classValue()] += weight; break; case Attribute.NUMERIC: distribution[0] += current.classValue() * weight; break; } } catch (Exception ex) { throw new Error("Data has no class attribute!"); } total += weight; } // Normalise distribution if (total > 0) { Utils.normalize(distribution, total); } return distribution; }
Example 16
Source File: DTNB.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Calculates the class membership probabilities for the given * test instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @exception Exception if distribution can't be computed */ public double [] distributionForInstance(Instance instance) throws Exception { DecisionTableHashKey thekey; double [] tempDist; double [] normDist; m_disTransform.input(instance); m_disTransform.batchFinished(); instance = m_disTransform.output(); m_delTransform.input(instance); m_delTransform.batchFinished(); Instance dtInstance = m_delTransform.output(); thekey = new DecisionTableHashKey(dtInstance, dtInstance.numAttributes(), false); // if this one is not in the table if ((tempDist = (double [])m_entries.get(thekey)) == null) { if (m_useIBk) { tempDist = m_ibk.distributionForInstance(dtInstance); } else { // tempDist = new double [m_theInstances.classAttribute().numValues()]; // tempDist[(int)m_majority] = 1.0; tempDist = m_classPriors.clone(); // return tempDist; ?????? } } else { // normalise distribution normDist = new double [tempDist.length]; System.arraycopy(tempDist,0,normDist,0,tempDist.length); Utils.normalize(normDist); tempDist = normDist; } double [] nbDist = m_NB.distributionForInstance(instance); for (int i = 0; i < nbDist.length; i++) { tempDist[i] = (Math.log(tempDist[i]) - Math.log(m_classPriors[i])); tempDist[i] += Math.log(nbDist[i]); /*tempDist[i] *= nbDist[i]; tempDist[i] /= m_classPriors[i];*/ } tempDist = Utils.logs2probs(tempDist); Utils.normalize(tempDist); return tempDist; }
Example 17
Source File: HNB.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Calculates the class membership probabilities for the given test instance * * @param instance the instance to be classified * @return predicted class probability distribution * @exception Exception if there is a problem generating the prediction */ public double[] distributionForInstance(Instance instance) throws Exception { //Definition of local variables double[] probs = new double[m_NumClasses]; int sIndex; double prob; double condiMutualInfoSum; // store instance's att values in an int array int[] attIndex = new int[m_NumAttributes]; for(int att = 0; att < m_NumAttributes; att++) { if(att == m_ClassIndex) attIndex[att] = -1; else attIndex[att] = m_StartAttIndex[att] + (int)instance.value(att); } // calculate probabilities for each possible class value for(int classVal = 0; classVal < m_NumClasses; classVal++) { probs[classVal]=(m_ClassCounts[classVal]+1.0/m_NumClasses)/(m_NumInstances+1.0); for(int son = 0; son < m_NumAttributes; son++) { if(attIndex[son]==-1) continue; sIndex=attIndex[son]; attIndex[son]=-1; prob=0; condiMutualInfoSum=0; for(int parent=0; parent<m_NumAttributes; parent++) { if(attIndex[parent]==-1) continue; condiMutualInfoSum+=m_condiMutualInfo[son][parent]; prob+=m_condiMutualInfo[son][parent]*(m_ClassAttAttCounts[classVal][attIndex[parent]][sIndex]+1.0/m_NumAttValues[son])/(m_ClassAttAttCounts[classVal][attIndex[parent]][attIndex[parent]] + 1.0); } if(condiMutualInfoSum>0){ prob=prob/condiMutualInfoSum; probs[classVal] *= prob; } else{ prob=(m_ClassAttAttCounts[classVal][sIndex][sIndex]+1.0/m_NumAttValues[son])/(m_ClassCounts[classVal]+1.0); probs[classVal]*= prob; } attIndex[son] = sIndex; } } Utils.normalize(probs); return probs; }
Example 18
Source File: CollectiveTree.java From collective-classification-weka-package with GNU General Public License v3.0 | 4 votes |
/** * determines the distribution of the instances with a non-missing value * at the given attribute position. * @param data the instances to work on * @param indices the sorted indices * @param att the attribute to determine the distribution for * @return the distribution */ protected double[] determineAttributeDistribution( Instances data, int[] indices, int att) { double[] result; int i; Instance inst; int count; double[] values; double median; // nominal attribute if (data.attribute(att).isNominal()) { result = new double[data.attribute(att).numValues()]; // determine attribute distribution (necessary to distribute instances // with no class and missing attribute) for (i = 0; i < indices.length; i++) { inst = data.instance(indices[i]); if (inst.isMissing(att)) break; result[(int) inst.value(att)] += inst.weight(); } } // numeric attribute else { result = new double[2]; // less or greater/equal than median // determine number of instances w/o missing attribute count = 0; for (i = 0; i < indices.length; i++) { inst = data.instance(indices[i]); if (inst.isMissing(att)) break; count++; } // determine median values = new double[count]; for (i = 0; i < count; i++) { inst = data.instance(indices[i]); values[i] = inst.value(att); } if (values.length == 0) median = 0; else if (values.length == 1) median = values[0]; else median = Utils.kthSmallestValue(values, values.length / 2); // disitribute for (i = 0; i < count; i++) { inst = data.instance(indices[i]); if (Utils.sm(inst.value(att), median)) result[0] += inst.weight(); else result[1] += inst.weight(); } } if (Utils.gr(Utils.sum(result), 0)) Utils.normalize(result); return result; }
Example 19
Source File: MultiClassClassifierUpdateable.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Returns the distribution for an instance. * * @param inst the instance to get the distribution for * @return the distribution * @throws Exception if the distribution can't be computed successfully */ @Override public double[] distributionForInstance(Instance inst) throws Exception { if (m_Classifiers.length == 1) { return m_Classifiers[0].distributionForInstance(inst); } double[] probs = new double[inst.numClasses()]; if (m_Method == METHOD_1_AGAINST_1) { double[][] r = new double[inst.numClasses()][inst.numClasses()]; double[][] n = new double[inst.numClasses()][inst.numClasses()]; for (int i = 0; i < m_ClassFilters.length; i++) { if (m_Classifiers[i] != null && m_SumOfWeights[i] > 0) { Instance tempInst = (Instance) inst.copy(); tempInst.setDataset(m_TwoClassDataset); double[] current = m_Classifiers[i].distributionForInstance(tempInst); Range range = new Range( ((RemoveWithValues) m_ClassFilters[i]).getNominalIndices()); range.setUpper(m_ClassAttribute.numValues()); int[] pair = range.getSelection(); if (m_pairwiseCoupling && inst.numClasses() > 2) { r[pair[0]][pair[1]] = current[0]; n[pair[0]][pair[1]] = m_SumOfWeights[i]; } else { if (current[0] > current[1]) { probs[pair[0]] += 1.0; } else { probs[pair[1]] += 1.0; } } } } if (m_pairwiseCoupling && inst.numClasses() > 2) { try { return pairwiseCoupling(n, r); } catch (IllegalArgumentException ex) { } } if (Utils.gr(Utils.sum(probs), 0)) { Utils.normalize(probs); } return probs; } else { probs = super.distributionForInstance(inst); } /* * if (probs.length == 1) { // ZeroR made the prediction return new * double[m_ClassAttribute.numValues()]; } */ return probs; }
Example 20
Source File: SimpleKMeansWithSilhouette.java From apogen with Apache License 2.0 | 4 votes |
/** * Initialize using the k-means++ method * * @param data * the training data * @throws Exception * if a problem occurs */ protected void kMeansPlusPlusInit(Instances data) throws Exception { Random randomO = new Random(getSeed()); HashMap<DecisionTableHashKey, String> initC = new HashMap<DecisionTableHashKey, String>(); // choose initial center uniformly at random int index = randomO.nextInt(data.numInstances()); m_ClusterCentroids.add(data.instance(index)); DecisionTableHashKey hk = new DecisionTableHashKey(data.instance(index), data.numAttributes(), true); initC.put(hk, null); int iteration = 0; int remainingInstances = data.numInstances() - 1; if (m_NumClusters > 1) { // proceed with selecting the rest // distances to the initial randomly chose center double[] distances = new double[data.numInstances()]; double[] cumProbs = new double[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { distances[i] = m_DistanceFunction.distance(data.instance(i), m_ClusterCentroids.instance(iteration)); } // now choose the remaining cluster centers for (int i = 1; i < m_NumClusters; i++) { // distances converted to probabilities double[] weights = new double[data.numInstances()]; System.arraycopy(distances, 0, weights, 0, distances.length); Utils.normalize(weights); double sumOfProbs = 0; for (int k = 0; k < data.numInstances(); k++) { sumOfProbs += weights[k]; cumProbs[k] = sumOfProbs; } cumProbs[data.numInstances() - 1] = 1.0; // make sure there are no // rounding issues // choose a random instance double prob = randomO.nextDouble(); for (int k = 0; k < cumProbs.length; k++) { if (prob < cumProbs[k]) { Instance candidateCenter = data.instance(k); hk = new DecisionTableHashKey(candidateCenter, data.numAttributes(), true); if (!initC.containsKey(hk)) { initC.put(hk, null); m_ClusterCentroids.add(candidateCenter); } else { // we shouldn't get here because any instance that is a duplicate // of // an already chosen cluster center should have zero distance (and // hence // zero probability of getting chosen) to that center. System.err.println("We shouldn't get here...."); } remainingInstances--; break; } } iteration++; if (remainingInstances == 0) { break; } // prepare to choose the next cluster center. // check distances against the new cluster center to see if it is closer for (int k = 0; k < data.numInstances(); k++) { if (distances[k] > 0) { double newDist = m_DistanceFunction.distance(data.instance(k), m_ClusterCentroids.instance(iteration)); if (newDist < distances[k]) { distances[k] = newDist; } } } } } }