Java Code Examples for weka.core.Instances#classIndex()
The following examples show how to use
weka.core.Instances#classIndex() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AddCluster.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * filters all attributes that should be ignored. * * @param data the data to filter * @return the filtered data * @throws Exception if filtering fails */ protected Instances removeIgnored(Instances data) throws Exception { Instances result = data; if (m_IgnoreAttributesRange != null || data.classIndex() >= 0) { m_removeAttributes = new Remove(); String rangeString = ""; if (m_IgnoreAttributesRange != null) { rangeString += m_IgnoreAttributesRange.getRanges(); } if (data.classIndex() >= 0) { if (rangeString.length() > 0) { rangeString += "," + (data.classIndex() + 1); } else { rangeString = "" + (data.classIndex() + 1); } } ((Remove) m_removeAttributes).setAttributeIndices(rangeString); ((Remove) m_removeAttributes).setInvertSelection(false); m_removeAttributes.setInputFormat(data); result = Filter.useFilter(data, m_removeAttributes); } return result; }
Example 2
Source File: NormalizeCase.java From tsml with GNU General Public License v3.0 | 6 votes |
public void standard(Instances r) throws Exception{ double mean,sum,sumSq,stdev,x,y; int size=r.numAttributes(); int classIndex=r.classIndex(); if(classIndex>0) size--; for(int i=0;i<r.numInstances();i++) { sum=sumSq=mean=stdev=0; for(int j=0;j<r.numAttributes();j++){ if(j!=classIndex&& !r.attribute(j).isNominal()){// Ignore all nominal atts{ x=r.instance(i).value(j); sum+=x; } mean=sum/size; } for(int j=0;j<r.numAttributes();j++){ if(j!=classIndex&& !r.attribute(j).isNominal()){// Ignore all nominal atts{ x=r.instance(i).value(j); r.instance(i).setValue(j,(x-mean)); } } } }
Example 3
Source File: AbstractVectorClusterer.java From tsml with GNU General Public License v3.0 | 6 votes |
protected void normaliseData(Instances data) throws Exception{ if (data.classIndex() >= 0 && data.classIndex() != data.numAttributes()-1){ throw new Exception("Class attribute is available and not the final attribute."); } attributeMeans = new double[data.numAttributes()-1]; attributeStdDevs = new double[data.numAttributes()-1]; for (int i = 0; i < data.numAttributes()-1; i++){ attributeMeans[i] = data.attributeStats(i).numericStats.mean; attributeStdDevs[i] = data.attributeStats(i).numericStats .stdDev; for (int n = 0; n < data.size(); n++){ Instance instance = data.get(n); instance.setValue(i, (instance.value(i) - attributeMeans[i]) /attributeStdDevs[i]); } } }
Example 4
Source File: LabeledItemSet.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Splits the class attribute away. Depending on the invert flag, the * instances without class attribute or only the class attribute of all * instances is returned * * @param instances the instances * @param invert flag; if true only the class attribute remains, otherweise * the class attribute is the only attribute that is deleted. * @throws Exception exception if instances cannot be splitted * @return Instances without the class attribute or instances with only the * class attribute */ public static Instances divide(Instances instances, boolean invert) throws Exception { Instances newInstances = new Instances(instances); if (instances.classIndex() < 0) throw new Exception( "For class association rule mining a class attribute has to be specified."); if (invert) { for (int i = 0; i < newInstances.numAttributes(); i++) { if (i != newInstances.classIndex()) { newInstances.deleteAttributeAt(i); i--; } } return newInstances; } else { newInstances.setClassIndex(-1); newInstances.deleteAttributeAt(instances.classIndex()); return newInstances; } }
Example 5
Source File: MLUtils.java From meka with GNU General Public License v3.0 | 6 votes |
/** * Get K - get the number of values associated with each label L. * @param D a dataset * @return a vector of size L: K_1,...,K_L */ public int[] getK(Instances D) { int L = D.classIndex(); HashSet counts[] = new HashSet[L]; int K[] = new int[L]; for(int j = 0; j < L; j++) { counts[j] = new HashSet<Integer>(); for(Instance x : D) { int k = (int)x.value(j); counts[j].add(k); } K[j] = counts[j].size(); /* System.out.println(""+j+" = "+counts[j]); if (counts[j].size() < 2) { System.out.println("OK, this is a problem ..."); //System.exit(1); } */ } return K; }
Example 6
Source File: StatUtils.java From meka with GNU General Public License v3.0 | 6 votes |
/** * GetApproxP - A fast version of getC(D), based on frequent sets. * Actually, if we don't prune, this is not even approximate -- it is the real empirical P. */ public static int[][] getApproxC(Instances D) { int N = D.numInstances(); int L = D.classIndex(); int C[][] = new int[L][L]; // @todo, can prune here to make even faster by pruning this. HashMap<LabelSet,Integer> map = MLUtils.countCombinationsSparse(D,L); for (LabelSet y : map.keySet()) { int c = map.get(y); for(int j = 0; j < y.indices.length; j++) { int j_ = y.indices[j]; C[j_][j_] += c; for(int k = j+1; k < y.indices.length; k++) { int k_ = y.indices[k]; C[j_][k_] += c; } } } return C; }
Example 7
Source File: MLUtils.java From meka with GNU General Public License v3.0 | 6 votes |
/** * LabelCardinalities - return the frequency of each label of dataset D. */ public static final double[] labelCardinalities(Instances D) { int L = D.classIndex(); double lc[] = new double[L]; for(int j = 0; j < L; j++) { int count = 0; for(int i = 0; i < D.numInstances(); i++) { //if for missing valueses if(!D.instance(i).isMissing(j)){ lc[j] += D.instance(i).value(j); count ++; } } lc[j] /= count; //D.numInstances(); } return lc; }
Example 8
Source File: YeoJohnson.java From tsml with GNU General Public License v3.0 | 5 votes |
static public void transformResponse(Instances data, double lambda, double[] response) { Instance inst; int responsePos=data.classIndex(); double[] newData=transform(lambda,response); for(int i=0;i<response.length;i++) { inst=data.instance(i); inst.setValue(responsePos,newData[i]); } }
Example 9
Source File: PACF.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { //Check capabilities for the filter. Can only handle real valued, no missing. getCapabilities().testWithFail(inputFormat); seriesLength=inputFormat.numAttributes(); if(inputFormat.classIndex()>=0) seriesLength--; if(maxLag>seriesLength-endTerms) maxLag=seriesLength-endTerms; if(maxLag<0) maxLag=inputFormat.numAttributes()-1; //Set up instances size and format. ArrayList<Attribute> atts=new ArrayList<>(); String name; for(int i=0;i<maxLag;i++){ name = "PACF_"+i; atts.add(new Attribute(name)); } if(inputFormat.classIndex()>=0){ //Classification set, set class //Get the class values Attribute target =inputFormat.attribute(inputFormat.classIndex()); ArrayList<String> vals=new ArrayList<>(target.numValues()); for(int i=0;i<target.numValues();i++) vals.add(target.value(i)); atts.add(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(),vals)); } Instances result = new Instances("PACF"+inputFormat.relationName(),atts,inputFormat.numInstances()); if(inputFormat.classIndex()>=0) result.setClassIndex(result.numAttributes()-1); return result; }
Example 10
Source File: ACF.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Truncates all cases to having n attributes, i.e. removes from numAtts()-n to numAtts()-1 * @param d * @param n */ public void truncate(Instances d, int n){ int att=n; while(att<d.numAttributes()){ if(att==d.classIndex()) att++; else d.deleteAttributeAt(att); } }
Example 11
Source File: CCp.java From meka with GNU General Public License v3.0 | 5 votes |
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D); int L = D.classIndex(); prepareChain(L); if(getDebug()) System.out.print(":- Chain ("); root = new meka.classifiers.multitarget.CCp.Link(retrieveChain(),0,D); if (getDebug()) System.out.println(" ) -:"); }
Example 12
Source File: RnnTextFilesEmbeddingInstanceIterator.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
@Override public LabeledSentenceProvider getSentenceProvider(Instances data) { List<File> files = new ArrayList<>(); List<String> labels = new ArrayList<>(); final int clsIdx = data.classIndex(); for (Instance inst : data) { labels.add(String.valueOf(inst.value(clsIdx))); final String path = inst.stringValue(1 - clsIdx); final File file = Paths.get(textsLocation.getAbsolutePath(), path).toFile(); files.add(file); } return new FileLabeledSentenceProvider(files, labels, data.numClasses()); }
Example 13
Source File: RegOptimizer.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * initializes the algorithm * * @param data the data to work with * @throws Exception if m_SVM is null */ protected void init(Instances data) throws Exception { if (m_SVM == null) { throw new Exception ("SVM not initialized in optimizer. Use RegOptimizer.setSVMReg()"); } m_C = m_SVM.getC(); m_data = data; m_classIndex = data.classIndex(); m_nInstances = data.numInstances(); // Initialize kernel m_kernel = Kernel.makeCopy(m_SVM.getKernel()); m_kernel.buildKernel(data); //init m_target m_target = new double[m_nInstances]; for (int i = 0; i < m_nInstances; i++) { m_target[i] = data.instance(i).classValue(); } m_random = new Random(m_nSeed); // initialize alpha and alpha* array to all zero m_alpha = new double[m_target.length]; m_alphaStar = new double[m_target.length]; m_supportVectors = new SMOset(m_nInstances); m_b = 0.0; m_nEvals = 0; m_nCacheHits = -1; }
Example 14
Source File: WARAM.java From meka with GNU General Public License v3.0 | 4 votes |
/** * Generates the classifier. * * @param instances set of instances serving as training data * @exception Exception if the classifier has not been generated * successfully */ public void buildClassifier(Instances D) throws Exception { // swap attributes to fit MEKA testCapabilities(D); int L = D.classIndex(); int featlength = (D.numAttributes() -L)*2; int numSamples = D.numInstances(); int classlength = L * 2; System.out.println("Using rho="+roa); if (numFeatures==-1){ initARAM( featlength,classlength ,roa , threshold ); }else{ if (featlength != numFeatures) { return ; } if (classlength != numClasses) { return ; }} // Copy the instances so we don't mess up the original data. // Function calls do not deep copy the arguments.. //Instances m_Instances = new Instances(instances); // Use the enumeration of instances to train classifier. // Do any sanity checks (e.g., missing attributes etc here // before calling updateClassifier for the actual learning Enumeration enumInsts = D.enumerateInstances(); while (enumInsts.hasMoreElements()) { Instance instance = (Instance) enumInsts.nextElement(); updateClassifier(instance); } System.out.println("Training done, used "+numCategories+" neurons."); // Alternatively, you can put the training logic within this method, // rather than updateClassifier(...). However, if you omit the // updateClassifier(...) method, you should remove // UpdateableClassifier from the class declaration above. }
Example 15
Source File: RandomSubSpace.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * builds the classifier. * * @param data the training data to be used for generating the * classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class m_data = new Instances(data); m_data.deleteWithMissingClass(); // only class? -> build ZeroR model if (m_data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(m_data); return; } else { m_ZeroR = null; } super.buildClassifier(data); Integer[] indices = new Integer[data.numAttributes()-1]; int classIndex = data.classIndex(); int offset = 0; for(int i = 0; i < indices.length+1; i++) { if (i != classIndex) { indices[offset++] = i+1; } } int subSpaceSize = numberOfAttributes(indices.length, getSubSpaceSize()); Random random = data.getRandomNumberGenerator(m_Seed); for (int j = 0; j < m_Classifiers.length; j++) { if (m_Classifier instanceof Randomizable) { ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); } FilteredClassifier fc = new FilteredClassifier(); fc.setClassifier(m_Classifiers[j]); m_Classifiers[j] = fc; Remove rm = new Remove(); rm.setOptions(new String[]{"-V", "-R", randomSubSpace(indices,subSpaceSize,classIndex+1,random)}); fc.setFilter(rm); // build the classifier //m_Classifiers[j].buildClassifier(m_data); } buildClassifiers(); // save memory m_data = null; }
Example 16
Source File: NormalizeAttribute.java From tsml with GNU General Public License v3.0 | 4 votes |
public NormalizeAttribute(Instances data){ trainData=data; classIndex=data.classIndex(); //Finds all the stats, doesnt cost much more really findStats(data); }
Example 17
Source File: MultilayerPerceptron.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * This function sets what the m_numeric flag to represent the passed class * it also performs the normalization of the attributes if applicable * and sets up the info to normalize the class. (note that regardless of * the options it will fill an array with the range and base, set to * normalize all attributes and the class to be between -1 and 1) * @param inst the instances. * @return The modified instances. This needs to be done. If the attributes * are normalized then deep copies will be made of all the instances which * will need to be passed back out. */ private Instances setClassType(Instances inst) throws Exception { if (inst != null) { // x bounds double min=Double.POSITIVE_INFINITY; double max=Double.NEGATIVE_INFINITY; double value; m_attributeRanges = new double[inst.numAttributes()]; m_attributeBases = new double[inst.numAttributes()]; for (int noa = 0; noa < inst.numAttributes(); noa++) { min = Double.POSITIVE_INFINITY; max = Double.NEGATIVE_INFINITY; for (int i=0; i < inst.numInstances();i++) { if (!inst.instance(i).isMissing(noa)) { value = inst.instance(i).value(noa); if (value < min) { min = value; } if (value > max) { max = value; } } } m_attributeRanges[noa] = (max - min) / 2; m_attributeBases[noa] = (max + min) / 2; if (noa != inst.classIndex() && m_normalizeAttributes) { for (int i = 0; i < inst.numInstances(); i++) { if (m_attributeRanges[noa] != 0) { inst.instance(i).setValue(noa, (inst.instance(i).value(noa) - m_attributeBases[noa]) / m_attributeRanges[noa]); } else { inst.instance(i).setValue(noa, inst.instance(i).value(noa) - m_attributeBases[noa]); } } } } if (inst.classAttribute().isNumeric()) { m_numeric = true; } else { m_numeric = false; } } return inst; }
Example 18
Source File: GeneticSearch.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Searches the attribute subset space using a genetic algorithm. * * @param ASEval the attribute evaluator to guide the search * @param data the training instances. * @return an array (not necessarily ordered) of selected attribute indexes * @throws Exception if the search can't be completed */ public int[] search (ASEvaluation ASEval, Instances data) throws Exception { m_best = null; m_generationReports = new StringBuffer(); if (!(ASEval instanceof SubsetEvaluator)) { throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!"); } if (ASEval instanceof UnsupervisedSubsetEvaluator) { m_hasClass = false; } else { m_hasClass = true; m_classIndex = data.classIndex(); } SubsetEvaluator ASEvaluator = (SubsetEvaluator)ASEval; m_numAttribs = data.numAttributes(); m_startRange.setUpper(m_numAttribs-1); if (!(getStartSet().equals(""))) { m_starting = m_startRange.getSelection(); } // initial random population m_lookupTable = new Hashtable(m_lookupTableSize); m_random = new Random(m_seed); m_population = new GABitSet [m_popSize]; // set up random initial population initPopulation(); evaluatePopulation(ASEvaluator); populationStatistics(); scalePopulation(); checkBest(); m_generationReports.append(populationReport(0)); boolean converged; for (int i=1;i<=m_maxGenerations;i++) { generation(); evaluatePopulation(ASEvaluator); populationStatistics(); scalePopulation(); // find the best pop member and check for convergence converged = checkBest(); if ((i == m_maxGenerations) || ((i % m_reportFrequency) == 0) || (converged == true)) { m_generationReports.append(populationReport(i)); if (converged == true) { break; } } } return attributeList(m_best.getChromosome()); }
Example 19
Source File: StatUtils.java From meka with GNU General Public License v3.0 | 4 votes |
/** * CondDepMatrix - Get a Conditional Dependency Matrix. * Based on Zhang's 'LEAD' approach, where<br> * the probability of labels j and k both getting errors on the same instance is error(j)*error(k) * if the actual co-occurence is otherwise. * @param D dataset * @return a L*L matrix of Unconditional Depndence. */ public static double[][] condDepMatrix(Instances D, Result result) { int L = D.classIndex(); int N = D.numInstances(); double T[][] = MLUtils.getYfromD(D); // Output (TEACHER) double Y[][] = MatrixUtils.threshold(result.allPredictions(), 0.5); // Output (PREDICTED) result.output = Result.getStats(result,"6"); // <-- high verbosity, because we need individual accuracies double E[] = fillError(result, L); // Errors (EXPECTED) double F[][][] = new double[3][L][L]; // Errors (ACTUAL) // Find the actual co-occurence ... for(int i = 0; i < N; i++) { int y[] = A.toIntArray(Y[i],0.5); // predicted int t[] = A.toIntArray(T[i],0.5); // actual (teacher) for(int j = 0; j < L; j++) { for(int k = j+1; k < L; k++) { if (y[j] != t[j] && y[k] != t[k]) { // if j incorrect and k also ... F[0][j][k]++; // error type 0 } else if (y[j] == t[j] && t[k] == y[k]) { // both are correct F[2][j][k]++; // error type 2 } else { // if only one is correct F[1][j][k]++; // error type 1 } } } } // Un-Normalize with the Expected error double E_norm[][][] = new double[3][L][L]; for(int j = 0; j < L; j++) { for(int k = j+1; k < L; k++) { E_norm[0][j][k] = N * (E[j] * E[k]); E_norm[2][j][k] = N * ((1.0 - E[k]) * (1.0 - E[j])); E_norm[1][j][k] = N * ( (E[j] * (1.0 - E[k])) + (1.0 - E[j]) * E[k]); } } return StatUtils.chi2(F,E_norm); }
Example 20
Source File: LabelTransformationClassifier.java From meka with GNU General Public License v3.0 | 3 votes |
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D); int L = D.classIndex(); if(getDebug()) System.out.print("transforming labels with size: "+L+" baseModel: "+m_Classifier.getClass().getName()+" "); Instances transformed_D = this.transformLabels(D); m_Classifier.buildClassifier(transformed_D); }