Java Code Examples for weka.core.Instances#numInstances()
The following examples show how to use
weka.core.Instances#numInstances() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: XMeans.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Clusters an instance. * * @param instance * the instance to assign a cluster to. * @param centers * the centers to cluster the instance to. * @return a cluster index. */ protected int clusterProcessedInstance(Instance instance, Instances centers) { double minDist = Integer.MAX_VALUE; int bestCluster = 0; for (int i = 0; i < centers.numInstances(); i++) { double dist = m_DistanceF.distance(instance, centers.instance(i)); if (dist < minDist) { minDist = dist; bestCluster = i; } } ; return bestCluster; }
Example 2
Source File: CitationKNN.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Wether the instances of two exemplars are or are not equal * @param exemplar1 first exemplar * @param exemplar2 second exemplar * @return if the instances of the exemplars are equal or not */ public boolean equalExemplars(Instance exemplar1, Instance exemplar2){ if(exemplar1.relationalValue(1).numInstances() == exemplar2.relationalValue(1).numInstances()){ Instances instances1 = exemplar1.relationalValue(1); Instances instances2 = exemplar2.relationalValue(1); for(int i = 0; i < instances1.numInstances(); i++){ Instance instance1 = instances1.instance(i); Instance instance2 = instances2.instance(i); for(int j = 0; j < instance1.numAttributes(); j++){ if(instance1.value(j) != instance2.value(j)){ return false; } } } return true; } return false; }
Example 3
Source File: RuleStats.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Static utility function to count the data covered by the * rules after the given index in the given rules, and then * remove them. It returns the data not covered by the * successive rules. * * @param data the data to be processed * @param rules the ruleset * @param index the given index * @return the data after processing */ public static Instances rmCoveredBySuccessives(Instances data, FastVector rules, int index){ Instances rt = new Instances(data, 0); for(int i=0; i < data.numInstances(); i++){ Instance datum = data.instance(i); boolean covered = false; for(int j=index+1; j<rules.size();j++){ Rule rule = (Rule)rules.elementAt(j); if(rule.covers(datum)){ covered = true; break; } } if(!covered) rt.add(datum); } return rt; }
Example 4
Source File: C45PruneableClassifierTreeG.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Initializes variables for grafting. * sets up limits array (for numeric attributes) and calls * the recursive function traverseTree. * * @param data the data for the tree * @throws Exception if anything goes wrong */ public void doGrafting(Instances data) throws Exception { // 2d array for the limits double [][] limits = new double[data.numAttributes()][2]; // 2nd dimension: index 0 == lower limit, index 1 == upper limit // initialise to no limit for(int i = 0; i < data.numAttributes(); i++) { limits[i][0] = Double.NEGATIVE_INFINITY; limits[i][1] = Double.POSITIVE_INFINITY; } // use an index instead of creating new Insances objects all the time // instanceIndex[0] == array for weights at leaf // instanceIndex[1] == array for weights in atbop double [][] instanceIndex = new double[2][data.numInstances()]; // initialize the weight for each instance for(int x = 0; x < data.numInstances(); x++) { instanceIndex[0][x] = 1; instanceIndex[1][x] = 1; // leaf instances are in atbop } // first call to graft traverseTree(data, instanceIndex, limits, this, 0, -1); }
Example 5
Source File: InstanceTools.java From tsml with GNU General Public License v3.0 | 5 votes |
public static double[][] fromWekaInstancesArray(Instances ds, boolean removeLastVal) { int numFeatures = ds.numAttributes() - (removeLastVal ? 1 : 0); int numInstances = ds.numInstances(); double[][] data = new double[numInstances][numFeatures]; for (int i = 0; i < numInstances; i++) { for (int j = 0; j < numFeatures; j++) { data[i][j] = ds.get(i).value(j); } } return data; }
Example 6
Source File: SAXVSM.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Leave-one-out CV without re-doing bop transformation every fold (still re-applying tfxidf) * * @return cv accuracy */ private double crossValidate(Instances data) throws Exception { transformedData = bop.process(data); double correct = 0; for (int i = 0; i < data.numInstances(); ++i) { corpus = tfxidf(transformedData, i); //apply tfxidf while ignoring BOP bag i if (classifyInstance(data.get(i)) == data.get(i).classValue()) ++correct; } return correct / data.numInstances(); }
Example 7
Source File: MultiInstanceToPropositional.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input * instance structure (any instances contained in the object are * ignored - only the structure is required). * @return true if the outputFormat may be collected immediately * @throws Exception if the input format can't be set * successfully */ public boolean setInputFormat(Instances instanceInfo) throws Exception { if (instanceInfo.attribute(1).type()!=Attribute.RELATIONAL) { throw new Exception("Can only handle relational-valued attribute!"); } super.setInputFormat(instanceInfo); m_NumBags = instanceInfo.numInstances(); m_NumInstances = 0; for (int i=0; i<m_NumBags; i++) m_NumInstances += instanceInfo.instance(i).relationalValue(1).numInstances(); Attribute classAttribute = (Attribute) instanceInfo.classAttribute().copy(); Attribute bagIndex = (Attribute) instanceInfo.attribute(0).copy(); /* create a new output format (propositional instance format) */ Instances newData = instanceInfo.attribute(1).relation().stringFreeStructure(); newData.insertAttributeAt(bagIndex, 0); newData.insertAttributeAt(classAttribute, newData.numAttributes()); newData.setClassIndex(newData.numAttributes() - 1); super.setOutputFormat(newData.stringFreeStructure()); m_BagStringAtts = new StringLocator(instanceInfo.attribute(1).relation().stringFreeStructure()); m_BagRelAtts = new RelationalLocator(instanceInfo.attribute(1).relation().stringFreeStructure()); return true; }
Example 8
Source File: MergeNominalValues.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Processes the given data. * * @param instances the data to process * @return the modified data * @throws Exception in case the processing goes wrong */ @Override public Instances process(Instances instances) throws Exception { // Generate the output and return it Instances result = new Instances(getOutputFormat(), instances.numInstances()); for (int i = 0; i < instances.numInstances(); i++) { Instance inst = instances.instance(i); double[] newData = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (m_AttToBeModified[j] && !inst.isMissing(j)) { newData[j] = m_Indicators[j][(int) inst.value(j)]; } else { newData[j] = inst.value(j); } } DenseInstance instNew = new DenseInstance(1.0, newData); instNew.setDataset(result); // copy possible strings, relational values... copyValues(instNew, false, inst.dataset(), getOutputFormat()); // Add instance to output result.add(instNew); } return result; }
Example 9
Source File: PowerCepstrum.java From tsml with GNU General Public License v3.0 | 5 votes |
public void logDataSet(Instances out ){ for(int i=0;i<out.numInstances();i++){ Instance ins=out.instance(i); for(int j=0;j<ins.numAttributes();j++){ if(j!=ins.classIndex()) ins.setValue(j,Math.log(ins.value(j))); } } }
Example 10
Source File: StatUtils.java From meka with GNU General Public License v3.0 | 5 votes |
/** * LEAD - Performs LEAD on dataset 'D', using BR with base classifier 'h', under random seed 'r'. * <br> * WARNING: changing this method will affect the perfomance of e.g., BCC -- on the other hand the original BCC paper did not use LEAD, so don't worry. */ public static double[][] LEAD(Instances D, Classifier h, Random r) throws Exception { Instances D_r = new Instances(D); D_r.randomize(r); Instances D_train = new Instances(D_r,0,D_r.numInstances()*60/100); Instances D_test = new Instances(D_r,D_train.numInstances(),D_r.numInstances()-D_train.numInstances()); BR br = new BR(); br.setClassifier(h); Result result = Evaluation.evaluateModel((MultiLabelClassifier)br,D_train,D_test,"PCut1","1"); return LEAD2(D_test,result); }
Example 11
Source File: DD_DTW.java From tsml with GNU General Public License v3.0 | 5 votes |
public static void main(String[] args){ // option 1: simple example of the classifier // option 2: recreate the results from the original published work int option = 1; try{ if(option==1){ String dataName = "ItalyPowerDemand"; Instances train = DatasetLoading.loadDataNullable(DATA_DIR+dataName+"/"+dataName+"_TRAIN"); Instances test = DatasetLoading.loadDataNullable(DATA_DIR+dataName+"/"+dataName+"_TEST"); // create the classifier, using DTW as the distance function as an example DD_DTW nndw = new DD_DTW(DistanceType.DTW);; // params a and b have not been explicitly set, so buildClassifier will cv to find them nndw.buildClassifier(train); int correct = 0; for(int i = 0; i < test.numInstances(); i++){ if(nndw.classifyInstance(test.instance(i))==test.instance(i).classValue()){ correct++; } } System.out.println(dataName+":\t"+new DecimalFormat("#.###").format((double)correct/test.numInstances()*100)+"%"); }else if(option==2){ recreateResultsTable(); } }catch(Exception e){ e.printStackTrace(); } }
Example 12
Source File: OnlineCachedShapeletDistance.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override public void init(Instances dataInst) { stats = new Stats(); //Normalise all time series for further processing int dataSize = dataInst.numInstances(); data = new double[dataSize][]; for (int i = 0; i < dataSize; i++) { data[i] = seriesRescaler.rescaleSeries(dataInst.get(i).toDoubleArray(), true); } }
Example 13
Source File: StatUtils.java From meka with GNU General Public License v3.0 | 5 votes |
/** * GetP - Get a pairwise empirical joint-probability matrix P[][] from dataset D. * <br> * NOTE multi-label only */ public static double[][] getP(Instances D) { double N = (double)D.numInstances(); int L = D.classIndex(); double P[][] = new double[L][L]; for(int j = 0; j < L; j++) { P[j][j] = p(D,j,1); for(int k = j+1; k < L; k++) { P[j][k] = P(D,j,1,k,1); } } return P; }
Example 14
Source File: CitationKNN.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Build the list of nearest k neighbors to the given test instance. * @param bag the bag to search for neighbors of * @param kNN the number of nearest neighbors * @param bags the data * @return a list of neighbors */ protected NeighborList findNeighbors(Instance bag, int kNN, Instances bags){ double distance; int index = 0; if(kNN > bags.numInstances()) kNN = bags.numInstances() - 1; NeighborList neighborList = new NeighborList(kNN); for(int i = 0; i < bags.numInstances(); i++){ if(bag != bags.instance(i)){ // for hold-one-out cross-validation distance = distanceSet(bag, bags.instance(i)) ; //mDistanceSet.distance(bag, mInstances, bags.exemplar(i), mInstances); if(m_NeighborListDebug) System.out.println("distance(bag, " + i + "): " + distance); if(neighborList.isEmpty() || (index < kNN) || (distance <= neighborList.mLast.mDistance)) neighborList.insertSorted(distance, bags.instance(i), i); index++; } } if(m_NeighborListDebug){ System.out.println("bag neighbors:"); neighborList.printReducedList(); } return neighborList; }
Example 15
Source File: ClassifierSplitModel.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Splits the given set of instances into subsets. * * @exception Exception if something goes wrong */ public final Instances [] split(Instances data) throws Exception { Instances [] instances = new Instances [m_numSubsets]; double [] weights; double newWeight; Instance instance; int subset, i, j; for (j=0;j<m_numSubsets;j++) instances[j] = new Instances((Instances)data, data.numInstances()); for (i = 0; i < data.numInstances(); i++) { instance = ((Instances) data).instance(i); weights = weights(instance); subset = whichSubset(instance); if (subset > -1) instances[subset].add(instance); else for (j = 0; j < m_numSubsets; j++) if (Utils.gr(weights[j],0)) { newWeight = weights[j]*instance.weight(); instances[j].add(instance); instances[j].lastInstance().setWeight(newWeight); } } for (j = 0; j < m_numSubsets; j++) instances[j].compactify(); return instances; }
Example 16
Source File: PowerCepstrum.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { //Set up instances size and format. int length=(fftFilter.findLength(inputFormat)); length/=2; ArrayList<Attribute> atts=new ArrayList<>(); String name; for(int i=0;i<length;i++){ name = "PowerSpectrum_"+i; atts.add(new Attribute(name)); } if(inputFormat.classIndex()>=0){ //Classification set, set class //Get the class values as a fast vector Attribute target =inputFormat.attribute(inputFormat.classIndex()); ArrayList<String> vals=new ArrayList<>(target.numValues()); for(int i=0;i<target.numValues();i++) vals.add(target.value(i)); atts.add(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(),vals)); } Instances result = new Instances("Cepstrum"+inputFormat.relationName(),atts,inputFormat.numInstances()); if(inputFormat.classIndex()>=0) result.setClassIndex(result.numAttributes()-1); return result; }
Example 17
Source File: NSR.java From meka with GNU General Public License v3.0 | 4 votes |
public Instances convertInstances(Instances D, int L) throws Exception { //Gather combinations HashMap<String,Integer> distinctCombinations = MLUtils.classCombinationCounts(D); if(getDebug()) System.out.println("Found "+distinctCombinations.size()+" unique combinations"); //Prune combinations MLUtils.pruneCountHashMap(distinctCombinations,m_P); if(getDebug()) System.out.println("Pruned to "+distinctCombinations.size()+" with P="+m_P); // Remove all class attributes Instances D_ = MLUtils.deleteAttributesAt(new Instances(D),MLUtils.gen_indices(L)); // Add a new class attribute D_.insertAttributeAt(new Attribute("CLASS", new ArrayList(distinctCombinations.keySet())),0); // create the class attribute D_.setClassIndex(0); //Add class values for (int i = 0; i < D.numInstances(); i++) { String y = MLUtils.encodeValue(MLUtils.toIntArray(D.instance(i),L)); // add it if(distinctCombinations.containsKey(y)) //if its class value exists D_.instance(i).setClassValue(y); // decomp else if(m_N > 0) { String d_subsets[] = SuperLabelUtils.getTopNSubsets(y, distinctCombinations, m_N); for (String s : d_subsets) { int w = distinctCombinations.get(s); Instance copy = (Instance)(D_.instance(i)).copy(); copy.setClassValue(s); copy.setWeight(1.0 / d_subsets.length); D_.add(copy); } } } // remove with missing class D_.deleteWithMissingClass(); // keep the header of new dataset for classification m_InstancesTemplate = new Instances(D_, 0); if (getDebug()) System.out.println(""+D_); return D_; }
Example 18
Source File: MIWrapper.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Computes the distribution for a given exemplar * * @param exmp the exemplar for which distribution is computed * @return the distribution * @throws Exception if the distribution can't be computed successfully */ public double[] distributionForInstance(Instance exmp) throws Exception { Instances testData = new Instances (exmp.dataset(),0); testData.add(exmp); // convert the training dataset into single-instance dataset m_ConvertToProp.setWeightMethod( new SelectedTag( MultiInstanceToPropositional.WEIGHTMETHOD_ORIGINAL, MultiInstanceToPropositional.TAGS_WEIGHTMETHOD)); testData = Filter.useFilter(testData, m_ConvertToProp); testData.deleteAttributeAt(0); //remove the bag index attribute // Compute the log-probability of the bag double [] distribution = new double[m_NumClasses]; double nI = (double)testData.numInstances(); double [] maxPr = new double [m_NumClasses]; for(int i=0; i<nI; i++){ double[] dist = m_Classifier.distributionForInstance(testData.instance(i)); for(int j=0; j<m_NumClasses; j++){ switch(m_Method){ case TESTMETHOD_ARITHMETIC: distribution[j] += dist[j]/nI; break; case TESTMETHOD_GEOMETRIC: // Avoid 0/1 probability if(dist[j]<0.001) dist[j] = 0.001; else if(dist[j]>0.999) dist[j] = 0.999; distribution[j] += Math.log(dist[j])/nI; break; case TESTMETHOD_MAXPROB: if (dist[j]>maxPr[j]) maxPr[j] = dist[j]; break; } } } if(m_Method == TESTMETHOD_GEOMETRIC) for(int j=0; j<m_NumClasses; j++) distribution[j] = Math.exp(distribution[j]); if(m_Method == TESTMETHOD_MAXPROB){ // for positive bag distribution[1] = maxPr[1]; distribution[0] = 1 - distribution[1]; } if (Utils.eq(Utils.sum(distribution), 0)) { for (int i = 0; i < distribution.length; i++) distribution[i] = 1.0 / (double) distribution.length; } else { Utils.normalize(distribution); } return distribution; }
Example 19
Source File: BoxTidwell.java From tsml with GNU General Public License v3.0 | 4 votes |
public static Instances transformRegressor(Instances data, int pos,int resultPos, double[] powers) { //1. Get values of the attribute of interest. //Confusingly, am working with attributes in rows not columns double[] temp=data.attributeToDoubleArray(pos); double[] originalData= new double[temp.length]; double[] logData= new double[temp.length]; for(int i=0;i<temp.length;i++) { originalData[i]=temp[i]; logData[i]=Math.log(temp[i]); } double[] y =data.attributeToDoubleArray(data.classIndex()); // I'm not sure if this is a memory copy or a reference copy, so be safe double[][] transposeFirst = new double[data.numAttributes()][data.numInstances()]; double[][] transposeSecond = new double[data.numAttributes()+1][data.numInstances()]; for(int j=0;j<data.numInstances();j++) { transposeFirst[0][j]=transposeSecond[0][j]=1; } for(int i=1;i<data.numAttributes();i++) { transposeFirst[i]=transposeSecond[i]=data.attributeToDoubleArray(i-1); } // Add one to pos cos of the ones pos=pos+1; // Second has an attribute at the end of data for transform int workingPos=data.numAttributes(); LinearModel l1,l2; double alpha=1, b1,b2; double min=0.1; boolean finished=false; int count=0; final int MaxIterations=10; // Initialise alpha to 1 //Find Base SSE //While not termination condition while(!finished) { // System.out.println(" Iteration = "+(count+1)+" alpha = "+alpha); //Create new attributes //1. Calculate x^alpha for(int j=0;j<originalData.length;j++) { transposeSecond[pos][j]=transposeFirst[pos][j]=Math.pow(originalData[j],alpha); } //2. Fit y=b1+ .. b_pos x^alpha (+ other terms)-> get b_pos l1=new LinearModel(transposeFirst,y); l1.fitModel(); //Not necessary: // l1.formTrainPredictions(); // l1.findTrainStatistics(); // System.out.println(l1+"\nVariance for L1 = "+l1.variance); b1=l1.paras[pos]; //3. Fit y=b*1+ .. b*_pos x^alpha +b*_workingPos x^alpha*log(x) (+ other terms)-> get b*2 //2. Calculate x^alpha*log(x) for(int j=0;j<originalData.length;j++) transposeSecond[workingPos][j]=transposeFirst[pos][j]*logData[j]; l2=new LinearModel(transposeSecond,y); l2.fitModel(); // Not necessary: // l2.formTrainPredictions(); // l2.findTrainStatistics(); // System.out.println(l2+"\nVariance for L2 = "+l2.variance); b2=l2.paras[workingPos]; alpha+=b2/b1; //Work out change term alpha = b*2/b1+alpha0 // System.out.println("New Alpha ="+alpha+" b1 = "+b1+" b2 = "+b2); //Update termination criteria: stop if small change: check notes count++; if(Math.abs(b2/b1)<min || count>=MaxIterations) finished=true; else if(Math.abs(alpha)>10) { alpha=1; finished=true; } } //Fix original powers[resultPos]=alpha; pos=pos-1; Instance inst; for(int i=0;i<data.numInstances();i++) { inst=data.instance(i); inst.setValue(pos,Math.pow(originalData[i],alpha)); } return data; }
Example 20
Source File: LexiconDistantSupervision.java From AffectiveTweets with GNU General Public License v3.0 | 4 votes |
@Override protected Instances process(Instances instances) throws Exception { // set upper value for text index m_textIndex.setUpper(instances.numAttributes() - 1); Instances result = getOutputFormat(); // reference to the content of the message, users index start from zero Attribute attrCont = instances.attribute(this.m_textIndex.getIndex()); for (int i = 0; i < instances.numInstances(); i++) { boolean addTweet=false; String content = instances.instance(i).stringValue(attrCont); ArrayList<String> posWords=new ArrayList<String>(); ArrayList<String> negWords=new ArrayList<String>(); this.m_tokenizer.tokenize(content); for(;this.m_tokenizer.hasMoreElements();){ String word=this.m_tokenizer.nextElement(); if(this.lex.getNomDict().containsKey(word)){ String value=this.lex.getNomDict().get(word).get(this.polarityAttName); if(value.equals(this.polarityAttPosValName)) posWords.add(word); else if(value.equals(this.polarityAttNegValName)) negWords.add(word); } } if(posWords.size()>0 && negWords.size()==0){ addTweet=true; // the matching words are removed from the content if flag is set if(this.removeMatchingWord) content=content.replaceAll(patternFromList(posWords), ""); } else if( negWords.size()>0 && posWords.size()==0){ addTweet=true; if(this.removeMatchingWord) content=content.replaceAll(patternFromList(negWords), ""); } if(addTweet){ double[] values = new double[result.numAttributes()]; // copy other attributes for (int n = 0; n < instances.numAttributes(); n++){ if(n!=this.m_textIndex.getIndex()) values[n] = instances.instance(i).value(n); } // add the content values[this.m_textIndex.getIndex()]= attrCont.addStringValue(content); // label tweet according to the word's polarity if(posWords.size()>0) values[result.numAttributes()-1]=1; else values[result.numAttributes()-1]=0; Instance inst = new SparseInstance(1, values); inst.setDataset(result); // copy possible strings, relational values... copyValues(inst, false, instances, result); result.add(inst); } } return result; }