Java Code Examples for weka.core.Instances#numAttributes()
The following examples show how to use
weka.core.Instances#numAttributes() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HillClimber.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * find best (or least bad) arc addition operation * * @param bayesNet Bayes network to add arc to * @param instances data set * @param oBestOperation * @return Operation containing best arc to add, or null if no arc addition is allowed * (this can happen if any arc addition introduces a cycle, or all parent sets are filled * up to the maximum nr of parents). * @throws Exception if something goes wrong */ Operation findBestArcToAdd(BayesNet bayesNet, Instances instances, Operation oBestOperation) throws Exception { int nNrOfAtts = instances.numAttributes(); // find best arc to add for (int iAttributeHead = 0; iAttributeHead < nNrOfAtts; iAttributeHead++) { if (bayesNet.getParentSet(iAttributeHead).getNrOfParents() < m_nMaxNrOfParents) { for (int iAttributeTail = 0; iAttributeTail < nNrOfAtts; iAttributeTail++) { if (addArcMakesSense(bayesNet, instances, iAttributeHead, iAttributeTail)) { Operation oOperation = new Operation(iAttributeTail, iAttributeHead, Operation.OPERATION_ADD); double fScore = calcScoreWithExtraParent(oOperation.m_nHead, oOperation.m_nTail); if (fScore > oBestOperation.m_fScore) { if (isNotTabu(oOperation)) { oBestOperation = oOperation; oBestOperation.m_fScore = fScore; } } } } } } return oBestOperation; }
Example 2
Source File: LibLINEAR.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * turns on nominal to binary filtering * if there are not only numeric attributes */ private Instances nominalToBinary( Instances insts ) throws Exception { boolean onlyNumeric = true; for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { onlyNumeric = false; break; } } } if (!onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } return insts; }
Example 3
Source File: InstanceTools.java From tsml with GNU General Public License v3.0 | 6 votes |
public static double[][] create2DMatrixFromInstances(Instances train, Instances test) { double [][] data = new double[train.numInstances() + test.numInstances()][train.numAttributes()]; for(int i=0; i<train.numInstances(); i++) { for(int j=0; j<train.numAttributes(); j++) { data[i][j] = train.get(i).value(j); } } int index=0; for(int i=train.numInstances(); i<train.numInstances()+test.numInstances(); i++) { for(int j=0; j<test.numAttributes(); j++) { data[i][j] = test.get(index).value(j); } ++index; } return data; }
Example 4
Source File: TweetToWordListCountFeatureVector.java From AffectiveTweets with GNU General Public License v3.0 | 6 votes |
@Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { ArrayList<Attribute> att = new ArrayList<Attribute>(); // Adds all attributes of the inputformat for (int i = 0; i < inputFormat.numAttributes(); i++) { att.add(inputFormat.attribute(i)); } // adds the new attribute att.add(new Attribute("wordListCount")); Instances result = new Instances(inputFormat.relationName(), att, 0); // set the class index result.setClassIndex(inputFormat.classIndex()); return result; }
Example 5
Source File: InstanceTools.java From tsml with GNU General Public License v3.0 | 6 votes |
public static void removeConstantTrainAttributes(Instances train, Instances test){ int i=0; while(i<train.numAttributes()-1){ //Dont test class // Test if constant int j=1; while(j<train.numInstances() && train.instance(j-1).value(i)==train.instance(j).value(i)) j++; if(j==train.numInstances()){ // Remove from train train.deleteAttributeAt(i); test.deleteAttributeAt(i); // Remove from test }else{ i++; } } }
Example 6
Source File: TunedXGBoost.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override public void buildClassifier(Instances insts) throws Exception { // long startTime=System.nanoTime(); long startTime=System.nanoTime(); booster = null; trainResults =new ClassifierResults(); trainInsts = new Instances(insts); numTrainInsts = insts.numInstances(); numAtts = insts.numAttributes(); numClasses = insts.numClasses(); if(cvFolds>numTrainInsts) cvFolds=numTrainInsts; // rng = new Random(seed); //for tie resolution etc if needed buildActualClassifer(); if(getEstimateOwnPerformance()&& !tuneParameters) //if tuneparas, will take the cv results of the best para set trainResults = estimateTrainAcc(trainInsts); if(saveEachParaAcc) trainResults.setBuildTime(combinedBuildTime); else trainResults.setBuildTime(System.nanoTime()-startTime); // trainResults.buildTime=System.nanoTime()-startTime; trainResults.setTimeUnit(TimeUnit.NANOSECONDS); trainResults.setClassifierName(tuneParameters ? "TunedXGBoost" : "XGBoost"); trainResults.setDatasetName(trainInsts.relationName()); trainResults.setParas(getParameters()); }
Example 7
Source File: EnsembleEvaluatorTest.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
@Test public void ensembleEvaluatorTest() throws Exception { logger.info("Starting cluster evaluation test..."); /* load dataset and create a train-test-split */ OpenmlConnector connector = new OpenmlConnector(); DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID); File file = ds.getDataset(DataSetUtils.API_KEY); Instances data = new Instances(new BufferedReader(new FileReader(file))); data.setClassIndex(data.numAttributes() - 1); List<Instances> split = WekaUtil.getStratifiedSplit(data, 42, .05f); Instances insts = split.get(0); long timeStart = System.currentTimeMillis(); ReliefFAttributeEval eval = new ReliefFAttributeEval(); eval.buildEvaluator(insts); long timeStartEval = System.currentTimeMillis(); double attEvalSum = 0; for (int i = 0; i < insts.numAttributes(); i++) { attEvalSum += eval.evaluateAttribute(i); } attEvalSum /= insts.numAttributes(); long timeTaken = System.currentTimeMillis() - timeStart; long timeTakenEval = System.currentTimeMillis() - timeStartEval; logger.info("Value: " + attEvalSum); Assert.assertTrue(attEvalSum > 0); logger.debug("Clustering took " + (timeTaken / 1000) + " s."); logger.debug("Clustering eval took " + (timeTakenEval / 1000) + " s."); }
Example 8
Source File: PAA.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { //Check all attributes are real valued, otherwise throw exception for (int i = 0; i < inputFormat.numAttributes(); i++) { if (inputFormat.classIndex() != i) { if (!inputFormat.attribute(i).isNumeric()) { throw new Exception("Non numeric attribute not allowed for PAA"); } } } //Set up instances size and format. ArrayList<Attribute> attributes = new ArrayList<>(); for (int i = 0; i < numIntervals; i++) attributes.add(new Attribute("PAAInterval_" + i)); if (inputFormat.classIndex() >= 0) { //Classification set, set class //Get the class values as a fast vector Attribute target = inputFormat.attribute(inputFormat.classIndex()); ArrayList<String> vals = new ArrayList<>(target.numValues()); for (int i = 0; i < target.numValues(); i++) { vals.add(target.value(i)); } attributes.add(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals)); } Instances result = new Instances("PAA" + inputFormat.relationName(), attributes, inputFormat.numInstances()); if (inputFormat.classIndex() >= 0) { result.setClassIndex(result.numAttributes() - 1); } return result; }
Example 9
Source File: PartitionedMultiFilter.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * determines the indices of unused attributes (ones that are not covered * by any of the range). * * @param data the data to base the determination on * @see #m_IndicesUnused */ protected void determineUnusedIndices(Instances data) { Vector<Integer> indices; int i; int n; boolean covered; // traverse all ranges indices = new Vector<Integer>(); for (i = 0; i < data.numAttributes(); i++) { if (i == data.classIndex()) continue; covered = false; for (n = 0; n < getRanges().length; n++) { if (getRanges()[n].isInRange(i)) { covered = true; break; } } if (!covered) indices.add(new Integer(i)); } // create array m_IndicesUnused = new int[indices.size()]; for (i = 0; i < indices.size(); i++) m_IndicesUnused[i] = indices.get(i).intValue(); if (getDebug()) System.out.println( "Unused indices: " + Utils.arrayToString(m_IndicesUnused)); }
Example 10
Source File: FilteredSubsetEval.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Initializes a filtered attribute evaluator. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); // Structure of original Instances original = new Instances(data, 0); m_filter.setInputFormat(data); data = Filter.useFilter(data, m_filter); // Can only proceed if filter has not altered the order or // number of attributes in the data if (data.numAttributes() != original.numAttributes()) { throw new Exception("Filter must not alter the number of " +"attributes in the data!"); } // Check the class index (if set) if (original.classIndex() >= 0) { if (data.classIndex() != original.classIndex()) { throw new Exception("Filter must not change the class attribute!"); } } // check the order for (int i = 0; i < original.numAttributes(); i++) { if (!data.attribute(i).name().equals(original.attribute(i).name())) { throw new Exception("Filter must not alter the order of the attributes!"); } } // can the evaluator handle this data? ((ASEvaluation)getSubsetEvaluator()).getCapabilities().testWithFail(data); m_filteredInstances = data.stringFreeStructure(); ((ASEvaluation)m_evaluator).buildEvaluator(data); }
Example 11
Source File: ModelFactory.java From AIDR with GNU Affero General Public License v3.0 | 5 votes |
private static Instances getTemplateSet(Instances dataSet) { ArrayList<Attribute> attributes = new ArrayList<Attribute>( dataSet.numAttributes()); for (int i = 0; i < dataSet.numAttributes(); i++) { attributes.add(dataSet.attribute(i)); } Instances specification = new Instances("spec", attributes, 0); specification.setClassIndex(specification.numAttributes() - 1); return specification; }
Example 12
Source File: RotationForest.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Checks m_MinGroup and m_MaxGroup * * @param data the dataset */ protected void checkMinMax(Instances data) { if( m_MinGroup > m_MaxGroup ) { int tmp = m_MaxGroup; m_MaxGroup = m_MinGroup; m_MinGroup = tmp; } int n = data.numAttributes(); if( m_MaxGroup >= n ) m_MaxGroup = n - 1; if( m_MinGroup >= n ) m_MinGroup = n - 1; }
Example 13
Source File: Tools.java From gsn with GNU General Public License v3.0 | 5 votes |
/** * add a new feature in the dataset containing the predicted values by the classifier * @param c the classifier * @param i the dataset * @throws Exception */ public static void add_predictions(Classifier c, Instances i) throws Exception{ double[] computed = new double[i.numInstances()]; for(int m = 0;m<computed.length;m++){ computed[m] = c.classifyInstance(i.instance(m)); } Attribute a = new Attribute("interpolate"); int num = i.numAttributes(); i.insertAttributeAt(a, num); for(int m = 0;m<computed.length;m++){ i.instance(m).setValue(num, computed[m]); } }
Example 14
Source File: sIB.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Transpose the document-term matrix to term-document matrix * @param data instances with document-term info * @return a term-document matrix transposed from the input dataset */ private Matrix getTransposedMatrix(Instances data) { double[][] temp = new double[data.numAttributes()][data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); for (int v = 0; v < inst.numValues(); v++) { temp[inst.index(v)][i] = inst.valueSparse(v); } } Matrix My_x = new Matrix(temp); return My_x; }
Example 15
Source File: LinearModel.java From tsml with GNU General Public License v3.0 | 5 votes |
public LinearModel(Instances data) { //Form X and Y from Instances n=data.numInstances(); m=data.numAttributes(); //includes the constant term y = data.attributeToDoubleArray(data.classIndex()); Y=new Matrix(y,y.length); double[][] xt = new double[m][n]; for(int i=0;i<n;i++) xt[0][i]=1; for(int i=1;i<m;i++) xt[i]=data.attributeToDoubleArray(i-1); Xt=new Matrix(xt); X=Xt.transpose(); }
Example 16
Source File: ARAMNetwork.java From meka with GNU General Public License v3.0 | 4 votes |
/** * Generates the classifier. * * @param instances set of instances serving as training data * @exception Exception if the classifier has not been generated * successfully */ public void buildClassifier(Instances D) throws Exception { int L = D.classIndex(); int featlength = (D.numAttributes() -L)*2; int numSamples = D.numInstances(); int classlength = L * 2; if (this.order==null){ order = new ArrayList<Integer>(); for (int j=0; j<D.numInstances();j++){ order.add(j); } } if (numFeatures==-1){ initARAM( featlength,classlength ,roa , threshold ); }else{ if (featlength != numFeatures) { return ; } if (classlength != numClasses) { return ; }} // Copy the instances so we don't mess up the original data. // Function calls do not deep copy the arguments.. //Instances m_Instances = new Instances(instances); // Use the enumeration of instances to train classifier. // Do any sanity checks (e.g., missing attributes etc here // before calling updateClassifier for the actual learning //Enumeration enumInsts = D.enumerateInstances(); for(int i=0; i<D.numInstances();i++){ Instance instance = D.get(order.get(i)); updateClassifier(instance); } System.out.println("Training done, used "+numCategories+" neurons with rho ="+roa+"."); // Alternatively, you can put the training logic within this method, // rather than updateClassifier(...). However, if you omit the // updateClassifier(...) method, you should remove // UpdateableClassifier from the class declaration above. }
Example 17
Source File: ArffLexiconEvaluator.java From AffectiveTweets with GNU General Public License v3.0 | 4 votes |
/** * Processes all the dictionary files. * @throws IOException an IOException will be raised if an invalid file is supplied */ public void processDict() throws IOException { BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile)); Instances lexInstances=new Instances(reader); // set upper value for word index lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1); List<Attribute> numericAttributes=new ArrayList<Attribute>(); List<Attribute> nominalAttributes=new ArrayList<Attribute>(); // checks all numeric and nominal attributes and discards the word attribute for(int i=0;i<lexInstances.numAttributes();i++){ if(i!=this.lexiconWordIndex.getIndex()){ if(lexInstances.attribute(i).isNumeric() ){ numericAttributes.add(lexInstances.attribute(i)); // adds the attribute name to the message-level features to be calculated this.featureNames.add(this.lexiconName+"-"+lexInstances.attribute(i).name()); } else if(lexInstances.attribute(i).isNominal() ){ nominalAttributes.add(lexInstances.attribute(i)); // adds the attribute name together with the nominal value to the message-level features to be calculated int numValues=lexInstances.attribute(i).numValues(); for(int j=0;j<numValues;j++) this.featureNames.add(this.lexiconName+"-"+lexInstances.attribute(i).name()+"-"+lexInstances.attribute(i).value(j)); } } } // Maps all words with their affective scores discarding missing values for(Instance inst:lexInstances){ if(inst.attribute(this.lexiconWordIndex.getIndex()).isString()){ String word=inst.stringValue(this.lexiconWordIndex.getIndex()); // stems the word word=this.m_stemmer.stem(word); // map numeric scores if(!numericAttributes.isEmpty()){ Map<String,Double> wordVals=new HashMap<String,Double>(); for(Attribute na:numericAttributes){ if(!weka.core.Utils.isMissingValue(inst.value(na))) wordVals.put(na.name(),inst.value(na)); } this.numDict.put(word, wordVals); } // map nominal associations if(!nominalAttributes.isEmpty()){ Map<String,String> wordCounts=new HashMap<String,String>(); for(Attribute no:nominalAttributes){ if(!weka.core.Utils.isMissingValue(inst.value(no))){ wordCounts.put(no.name(),no.value((int) inst.value(no))); } this.nomDict.put(word, wordCounts); } } } } }
Example 18
Source File: WAODE.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Generates the classifier. * * @param instances set of instances serving as training data * @throws Exception if the classifier has not been generated successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // only class? -> build ZeroR model if (instances.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(instances); return; } else { m_ZeroR = null; } // reset variable m_NumClasses = instances.numClasses(); m_ClassIndex = instances.classIndex(); m_NumAttributes = instances.numAttributes(); m_NumInstances = instances.numInstances(); m_TotalAttValues = 0; // allocate space for attribute reference arrays m_StartAttIndex = new int[m_NumAttributes]; m_NumAttValues = new int[m_NumAttributes]; // set the starting index of each attribute and the number of values for // each attribute and the total number of values for all attributes (not including class). for (int i = 0; i < m_NumAttributes; i++) { if (i != m_ClassIndex) { m_StartAttIndex[i] = m_TotalAttValues; m_NumAttValues[i] = instances.attribute(i).numValues(); m_TotalAttValues += m_NumAttValues[i]; } else { m_StartAttIndex[i] = -1; m_NumAttValues[i] = m_NumClasses; } } // allocate space for counts and frequencies m_ClassCounts = new double[m_NumClasses]; m_AttCounts = new double[m_TotalAttValues]; m_AttAttCounts = new double[m_TotalAttValues][m_TotalAttValues]; m_ClassAttAttCounts = new double[m_NumClasses][m_TotalAttValues][m_TotalAttValues]; m_Header = new Instances(instances, 0); // Calculate the counts for (int k = 0; k < m_NumInstances; k++) { int classVal=(int)instances.instance(k).classValue(); m_ClassCounts[classVal] ++; int[] attIndex = new int[m_NumAttributes]; for (int i = 0; i < m_NumAttributes; i++) { if (i == m_ClassIndex){ attIndex[i] = -1; } else{ attIndex[i] = m_StartAttIndex[i] + (int)instances.instance(k).value(i); m_AttCounts[attIndex[i]]++; } } for (int Att1 = 0; Att1 < m_NumAttributes; Att1++) { if (attIndex[Att1] == -1) continue; for (int Att2 = 0; Att2 < m_NumAttributes; Att2++) { if ((attIndex[Att2] != -1)) { m_AttAttCounts[attIndex[Att1]][attIndex[Att2]] ++; m_ClassAttAttCounts[classVal][attIndex[Att1]][attIndex[Att2]] ++; } } } } //compute mutual information between each attribute and class m_mutualInformation=new double[m_NumAttributes]; for (int att=0;att<m_NumAttributes;att++){ if (att == m_ClassIndex) continue; m_mutualInformation[att]=mutualInfo(att); } }
Example 19
Source File: ARAMNetworkSparse.java From meka with GNU General Public License v3.0 | 4 votes |
/** * Generates the classifier. * * @param instances set of instances serving as training data * @exception Exception if the classifier has not been generated * successfully */ public void buildClassifier(Instances D) throws Exception { int L = D.classIndex(); int featlength = (D.numAttributes() -L)*2; int numSamples = D.numInstances(); int classlength = L * 2; if (this.order==null){ order = new ArrayList<Integer>(); for (int j=0; j<D.numInstances();j++){ order.add(j); } } if (numFeatures==-1){ initARAM( featlength,classlength ,roa , threshold ); }else{ if (featlength != numFeatures) { return ; } if (classlength != numClasses) { return ; }} // Copy the instances so we don't mess up the original data. // Function calls do not deep copy the arguments.. //Instances m_Instances = new Instances(instances); // Use the enumeration of instances to train classifier. // Do any sanity checks (e.g., missing attributes etc here // before calling updateClassifier for the actual learning for(int i=0; i<D.numInstances();i++){ Instance instance = D.get(order.get(i)); updateClassifier(instance); } System.out.println("Training done, used "+numCategories+" neurons with rho ="+roa+"."); // Alternatively, you can put the training logic within this method, // rather than updateClassifier(...). However, if you omit the // updateClassifier(...) method, you should remove // UpdateableClassifier from the class declaration above. }
Example 20
Source File: ArffLexiconWordLabeller.java From AffectiveTweets with GNU General Public License v3.0 | 2 votes |
/** * Processes all the dictionary files. * @throws IOException an IOException will be raised if an invalid file is supplied */ public void processDict() throws IOException { BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile)); Instances lexInstances=new Instances(reader); // set upper value for word index lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1); // checks all numeric and nominal attributes and discards the word attribute for(int i=0;i<lexInstances.numAttributes();i++){ if(i!=this.lexiconWordIndex.getIndex()){ if(lexInstances.attribute(i).isNumeric() || lexInstances.attribute(i).isNominal() ){ this.attributes.add(lexInstances.attribute(i)); } } } // Maps all words with their affective scores discarding missing values for(Instance inst:lexInstances){ if(inst.attribute(this.lexiconWordIndex.getIndex()).isString()){ String word=inst.stringValue(this.lexiconWordIndex.getIndex()); // stems the word word=this.m_stemmer.stem(word); // map numeric scores if(!attributes.isEmpty()){ Map<Attribute,Double> wordVals=new HashMap<Attribute,Double>(); for(Attribute na:attributes){ wordVals.put(na,inst.value(na)); } this.attValMap.put(word, wordVals); } } } }