Java Code Examples for weka.core.Instances#attributeToDoubleArray()
The following examples show how to use
weka.core.Instances#attributeToDoubleArray() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ClusteringUtilities.java From tsml with GNU General Public License v3.0 | 6 votes |
public static double randIndex(int[] predicted, Instances inst){ double[] actual = inst.attributeToDoubleArray(inst.classIndex()); double A = 0, B = 0, C = 0, D = 0; for (int i = 0; i < predicted.length; i++){ for (int n = 0; n < actual.length; n++){ if ((predicted[i] == predicted[n]) && (actual[i] == actual[n])){ A++; } else if ((predicted[i] != predicted[n]) && (actual[i] != actual[n])){ B++; } else if ((predicted[i] == predicted[n]) && (actual[i] != actual[n])){ C++; } else{ D++; } } } return (A + B)/(A + B + C + D); }
Example 2
Source File: ThresholdCurve.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Gets the index of the instance with the closest threshold value to the * desired target * * @param tcurve a set of instances that have been generated by this class * @param threshold the target threshold * @return the index of the instance that has threshold closest to * the target, or -1 if this could not be found (i.e. no data, or * bad threshold target) */ public static int getThresholdInstance(Instances tcurve, double threshold) { if (!RELATION_NAME.equals(tcurve.relationName()) || (tcurve.numInstances() == 0) || (threshold < 0) || (threshold > 1.0)) { return -1; } if (tcurve.numInstances() == 1) { return 0; } double [] tvals = tcurve.attributeToDoubleArray(tcurve.numAttributes() - 1); int [] sorted = Utils.sort(tvals); return binarySearch(sorted, tvals, threshold); }
Example 3
Source File: MultiLinearRegression.java From tsml with GNU General Public License v3.0 | 6 votes |
@Override public void buildClassifier(Instances data) throws Exception { //creating the 2class version of the insts numericClassInsts = new Instances(data); numericClassInsts.setClassIndex(0); //temporary numericClassInsts.deleteAttributeAt(numericClassInsts.numAttributes()-1); Attribute newClassAtt = new Attribute("newClassVal"); //numeric class numericClassInsts.insertAttributeAt(newClassAtt, numericClassInsts.numAttributes()); numericClassInsts.setClassIndex(numericClassInsts.numAttributes()-1); //temporary //and building the regressors regressors = new LinearRegression[data.numClasses()]; double[] trueClassVals = data.attributeToDoubleArray(data.classIndex()); for (int c = 0; c < data.numClasses(); c++) { for (int i = 0; i < numericClassInsts.numInstances(); i++) { //if this inst is of the class we're currently handling (c), set new class val to 1 else 0 double cval = trueClassVals[i] == c ? 1 : 0; numericClassInsts.instance(i).setClassValue(cval); } regressors[c] = new LinearRegression(); regressors[c].buildClassifier(numericClassInsts); } }
Example 4
Source File: MultiResponseModelTrees.java From tsml with GNU General Public License v3.0 | 6 votes |
@Override public void buildClassifier(Instances data) throws Exception { //creating the 2class version of the insts numericClassInsts = new Instances(data); numericClassInsts.setClassIndex(0); //temporary numericClassInsts.deleteAttributeAt(numericClassInsts.numAttributes()-1); Attribute newClassAtt = new Attribute("newClassVal"); //numeric class numericClassInsts.insertAttributeAt(newClassAtt, numericClassInsts.numAttributes()); numericClassInsts.setClassIndex(numericClassInsts.numAttributes()-1); //temporary //and building the regressors regressors = new M5P[data.numClasses()]; double[] trueClassVals = data.attributeToDoubleArray(data.classIndex()); for (int c = 0; c < data.numClasses(); c++) { for (int i = 0; i < numericClassInsts.numInstances(); i++) { //if this inst is of the class we're currently handling (c), set new class val to 1 else 0 double cval = trueClassVals[i] == c ? 1 : 0; numericClassInsts.instance(i).setClassValue(cval); } regressors[c] = new M5P(); regressors[c].buildClassifier(numericClassInsts); } }
Example 5
Source File: Reciprocal.java From tsml with GNU General Public License v3.0 | 5 votes |
public Instances transform(Instances data){ //Not ideal, should call a method to get this int responsePos=data.numAttributes()-1; double[] response=data.attributeToDoubleArray(responsePos); //Find the min value double min=response[0]; for(int i=0;i<response.length;i++) { if(response[i]<min) min=response[i]; } if(min<=zeroOffset) //Cant take a log of a negative, so offset { offSet=-min+zeroOffset; } else offSet=0; System.out.println(" Min value = "+min+" offset = "+offSet); for(int i=0;i<data.numInstances();i++) { Instance t = data.instance(i); double resp=t.value(responsePos); System.out.print(i+" "+resp); resp=1/(resp+offSet); System.out.println(" "+resp); t.setValue(responsePos,resp); } return data; }
Example 6
Source File: MatrixUtils.java From meka with GNU General Public License v3.0 | 5 votes |
/** * Helper method that transforma an Instances object to a Matrix object. * * @param inst The Instances to transform. * @return The resulting Matrix object. */ public static Matrix instancesToMatrix(Instances inst){ double[][] darr = new double[inst.numInstances()][inst.numAttributes()]; for (int i =0 ; i < inst.numAttributes(); i++) { for (int j = 0; j < inst.attributeToDoubleArray(i).length; j++) { darr[j][i] = inst.attributeToDoubleArray(i)[j]; } } return new Matrix(darr); }
Example 7
Source File: LinearModel.java From tsml with GNU General Public License v3.0 | 5 votes |
public double[] formTestPredictions(Instances testData) { //Form X matrix from testData int rows=testData.numInstances(); int cols=testData.numAttributes(); //includes the constant term predicted=new double[rows]; if(cols!=m) { System.out.println("Error: Mismatch in attribute lengths in form test Train ="+m+" Test ="+cols); System.exit(0); } double[][] xt = new double[cols][rows]; for(int i=0;i<rows;i++) xt[0][i]=1; for(int i=1;i<cols;i++) xt[i]=testData.attributeToDoubleArray(i-1); Matrix testX=new Matrix(xt); testX=testX.transpose(); for(int i=0;i<rows;i++) { //Find predicted predicted[i]=paras[0]; for(int j=1;j<paras.length;j++) predicted[i]+=paras[j]*testX.get(i,j); } return predicted; }
Example 8
Source File: LinearModel.java From tsml with GNU General Public License v3.0 | 5 votes |
public LinearModel(Instances data) { //Form X and Y from Instances n=data.numInstances(); m=data.numAttributes(); //includes the constant term y = data.attributeToDoubleArray(data.classIndex()); Y=new Matrix(y,y.length); double[][] xt = new double[m][n]; for(int i=0;i<n;i++) xt[0][i]=1; for(int i=1;i<m;i++) xt[i]=data.attributeToDoubleArray(i-1); Xt=new Matrix(xt); X=Xt.transpose(); }
Example 9
Source File: YeoJohnson.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override public Instances transform(Instances data) { System.out.println(" Doesnt do anything! "); int responsePos=data.numAttributes()-1; double[] response=data.attributeToDoubleArray(responsePos); double[] preds=new double[response.length]; double v; Instance inst; return data; }
Example 10
Source File: YeoJohnson.java From tsml with GNU General Public License v3.0 | 5 votes |
static public double findBestTransform(Instances data, int pos, double[] power) { int responsePos=data.classIndex(); double[] temp=data.attributeToDoubleArray(responsePos); double[] response=new double[temp.length]; System.arraycopy(temp, 0, response, 0, temp.length); double[] predictions=new double[response.length]; double v; Instance inst; LinearModel lm; double bestLambda=MIN,minError=Double.MAX_VALUE,error; double correlation; for(double lambda=MIN;lambda<=MAX;lambda+=INTERVAL) { //Transform response transformResponse(data,lambda,response); lm=new LinearModel(data); lm.fitModel(); lm.formTrainPredictions(); lm.findTrainStatistics(); //Use the K-S stat for this error=ResidualTests.kolmogorovSmirnoff(lm.stdResidual); correlation=ResidualTests.testHeteroscadisity(lm.y,lm.predicted); if(error<minError) { bestLambda=lambda; minError=error; } // System.out.println(" Lambda ="+lambda+" KS Stat = "+error+" Correlation = "+correlation); } power[pos]=bestLambda; return minError; }
Example 11
Source File: YeoJohnson.java From tsml with GNU General Public License v3.0 | 5 votes |
static public Instances invertResponse(Instances data, double lambda){ Instance inst; int responsePos=data.classIndex(); double[] response=data.attributeToDoubleArray(responsePos); double v; for(int i=0;i<response.length;i++) { inst=data.instance(i); if(response[i]<0) { if(lambda!=2) v=-(Math.pow((1-response[i]),2-lambda)-1)/(2-lambda); else v=-Math.log(1-response[i]); } else { if(lambda==0) v=Math.log(1+response[i]); else v=(Math.pow(response[i]+1,lambda)-1)/lambda; } inst.setValue(responsePos,v); } return data; }
Example 12
Source File: YeoJohnson.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override public Instances invert(Instances data){ Instance inst; int responsePos=data.numAttributes()-1; double[] response=data.attributeToDoubleArray(responsePos); double v; double[] newVals=invert(bestLambda,response); for(int i=0;i<data.numInstances();i++) { inst=data.instance(i); inst.setValue(responsePos,newVals[i]); } return data; }
Example 13
Source File: ThresholdCurve.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Calculates the area under the ROC curve as the Wilcoxon-Mann-Whitney statistic. * * @param tcurve a previously extracted threshold curve Instances. * @return the ROC area, or Double.NaN if you don't pass in * a ThresholdCurve generated Instances. */ public static double getROCArea(Instances tcurve) { final int n = tcurve.numInstances(); if (!RELATION_NAME.equals(tcurve.relationName()) || (n == 0)) { return Double.NaN; } final int tpInd = tcurve.attribute(TRUE_POS_NAME).index(); final int fpInd = tcurve.attribute(FALSE_POS_NAME).index(); final double [] tpVals = tcurve.attributeToDoubleArray(tpInd); final double [] fpVals = tcurve.attributeToDoubleArray(fpInd); double area = 0.0, cumNeg = 0.0; final double totalPos = tpVals[0]; final double totalNeg = fpVals[0]; for (int i = 0; i < n; i++) { double cip, cin; if (i < n - 1) { cip = tpVals[i] - tpVals[i + 1]; cin = fpVals[i] - fpVals[i + 1]; } else { cip = tpVals[n - 1]; cin = fpVals[n - 1]; } area += cip * (cumNeg + (0.5 * cin)); cumNeg += cin; } area /= (totalNeg * totalPos); return area; }
Example 14
Source File: ThresholdCurve.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Calculates the area under the precision-recall curve (AUPRC). * * @param tcurve a previously extracted threshold curve Instances. * @return the PRC area, or Double.NaN if you don't pass in * a ThresholdCurve generated Instances. */ public static double getPRCArea(Instances tcurve) { final int n = tcurve.numInstances(); if (!RELATION_NAME.equals(tcurve.relationName()) || (n == 0)) { return Double.NaN; } final int pInd = tcurve.attribute(PRECISION_NAME).index(); final int rInd = tcurve.attribute(RECALL_NAME).index(); final double [] pVals = tcurve.attributeToDoubleArray(pInd); final double [] rVals = tcurve.attributeToDoubleArray(rInd); double area = 0; double xlast = rVals[n - 1]; // start from the first real p/r pair (not the artificial zero point) for (int i = n - 2; i >= 0; i--) { double recallDelta = rVals[i] - xlast; area += (pVals[i] * recallDelta); xlast = rVals[i]; } if (area == 0) { return Utils.missingValue(); } return area; }
Example 15
Source File: InterquartileRange.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * computes the thresholds for outliers and extreme values * * @param instances the data to work on */ protected void computeThresholds(Instances instances) { int i; double[] values; int[] sortedIndices; int half; int quarter; double q1; double q2; double q3; m_UpperExtremeValue = new double[m_AttributeIndices.length]; m_UpperOutlier = new double[m_AttributeIndices.length]; m_LowerOutlier = new double[m_AttributeIndices.length]; m_LowerExtremeValue = new double[m_AttributeIndices.length]; m_Median = new double[m_AttributeIndices.length]; m_IQR = new double[m_AttributeIndices.length]; for (i = 0; i < m_AttributeIndices.length; i++) { // non-numeric attribute? if (m_AttributeIndices[i] == NON_NUMERIC) continue; // sort attribute data values = instances.attributeToDoubleArray(m_AttributeIndices[i]); sortedIndices = Utils.sort(values); // determine indices half = sortedIndices.length / 2; quarter = half / 2; if (sortedIndices.length % 2 == 1) { q2 = values[sortedIndices[half]]; } else { q2 = (values[sortedIndices[half]] + values[sortedIndices[half + 1]]) / 2; } if (half % 2 == 1) { q1 = values[sortedIndices[quarter]]; q3 = values[sortedIndices[sortedIndices.length - quarter - 1]]; } else { q1 = (values[sortedIndices[quarter]] + values[sortedIndices[quarter + 1]]) / 2; q3 = (values[sortedIndices[sortedIndices.length - quarter - 1]] + values[sortedIndices[sortedIndices.length - quarter]]) / 2; } // determine thresholds and other values m_Median[i] = q2; m_IQR[i] = q3 - q1; m_UpperExtremeValue[i] = q3 + getExtremeValuesFactor() * m_IQR[i]; m_UpperOutlier[i] = q3 + getOutlierFactor() * m_IQR[i]; m_LowerOutlier[i] = q1 - getOutlierFactor() * m_IQR[i]; m_LowerExtremeValue[i] = q1 - getExtremeValuesFactor() * m_IQR[i]; } }
Example 16
Source File: BoxTidwell.java From tsml with GNU General Public License v3.0 | 4 votes |
public static Instances transformRegressor(Instances data, int pos,int resultPos, double[] powers) { //1. Get values of the attribute of interest. //Confusingly, am working with attributes in rows not columns double[] temp=data.attributeToDoubleArray(pos); double[] originalData= new double[temp.length]; double[] logData= new double[temp.length]; for(int i=0;i<temp.length;i++) { originalData[i]=temp[i]; logData[i]=Math.log(temp[i]); } double[] y =data.attributeToDoubleArray(data.classIndex()); // I'm not sure if this is a memory copy or a reference copy, so be safe double[][] transposeFirst = new double[data.numAttributes()][data.numInstances()]; double[][] transposeSecond = new double[data.numAttributes()+1][data.numInstances()]; for(int j=0;j<data.numInstances();j++) { transposeFirst[0][j]=transposeSecond[0][j]=1; } for(int i=1;i<data.numAttributes();i++) { transposeFirst[i]=transposeSecond[i]=data.attributeToDoubleArray(i-1); } // Add one to pos cos of the ones pos=pos+1; // Second has an attribute at the end of data for transform int workingPos=data.numAttributes(); LinearModel l1,l2; double alpha=1, b1,b2; double min=0.1; boolean finished=false; int count=0; final int MaxIterations=10; // Initialise alpha to 1 //Find Base SSE //While not termination condition while(!finished) { // System.out.println(" Iteration = "+(count+1)+" alpha = "+alpha); //Create new attributes //1. Calculate x^alpha for(int j=0;j<originalData.length;j++) { transposeSecond[pos][j]=transposeFirst[pos][j]=Math.pow(originalData[j],alpha); } //2. Fit y=b1+ .. b_pos x^alpha (+ other terms)-> get b_pos l1=new LinearModel(transposeFirst,y); l1.fitModel(); //Not necessary: // l1.formTrainPredictions(); // l1.findTrainStatistics(); // System.out.println(l1+"\nVariance for L1 = "+l1.variance); b1=l1.paras[pos]; //3. Fit y=b*1+ .. b*_pos x^alpha +b*_workingPos x^alpha*log(x) (+ other terms)-> get b*2 //2. Calculate x^alpha*log(x) for(int j=0;j<originalData.length;j++) transposeSecond[workingPos][j]=transposeFirst[pos][j]*logData[j]; l2=new LinearModel(transposeSecond,y); l2.fitModel(); // Not necessary: // l2.formTrainPredictions(); // l2.findTrainStatistics(); // System.out.println(l2+"\nVariance for L2 = "+l2.variance); b2=l2.paras[workingPos]; alpha+=b2/b1; //Work out change term alpha = b*2/b1+alpha0 // System.out.println("New Alpha ="+alpha+" b1 = "+b1+" b2 = "+b2); //Update termination criteria: stop if small change: check notes count++; if(Math.abs(b2/b1)<min || count>=MaxIterations) finished=true; else if(Math.abs(alpha)>10) { alpha=1; finished=true; } } //Fix original powers[resultPos]=alpha; pos=pos-1; Instance inst; for(int i=0;i<data.numInstances();i++) { inst=data.instance(i); inst.setValue(pos,Math.pow(originalData[i],alpha)); } return data; }
Example 17
Source File: ThresholdCurve.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Calculates the n point precision result, which is the precision averaged * over n evenly spaced (w.r.t recall) samples of the curve. * * @param tcurve a previously extracted threshold curve Instances. * @param n the number of points to average over. * @return the n-point precision. */ public static double getNPointPrecision(Instances tcurve, int n) { if (!RELATION_NAME.equals(tcurve.relationName()) || (tcurve.numInstances() == 0)) { return Double.NaN; } int recallInd = tcurve.attribute(RECALL_NAME).index(); int precisInd = tcurve.attribute(PRECISION_NAME).index(); double [] recallVals = tcurve.attributeToDoubleArray(recallInd); int [] sorted = Utils.sort(recallVals); double isize = 1.0 / (n - 1); double psum = 0; for (int i = 0; i < n; i++) { int pos = binarySearch(sorted, recallVals, i * isize); double recall = recallVals[sorted[pos]]; double precis = tcurve.instance(sorted[pos]).value(precisInd); /* System.err.println("Point " + (i + 1) + ": i=" + pos + " r=" + (i * isize) + " p'=" + precis + " r'=" + recall); */ // interpolate figures for non-endpoints while ((pos != 0) && (pos < sorted.length - 1)) { pos++; double recall2 = recallVals[sorted[pos]]; if (recall2 != recall) { double precis2 = tcurve.instance(sorted[pos]).value(precisInd); double slope = (precis2 - precis) / (recall2 - recall); double offset = precis - recall * slope; precis = isize * i * slope + offset; /* System.err.println("Point2 " + (i + 1) + ": i=" + pos + " r=" + (i * isize) + " p'=" + precis2 + " r'=" + recall2 + " p''=" + precis); */ break; } } psum += precis; } return psum / n; }
Example 18
Source File: C45PruneableClassifierTreeG.java From tsml with GNU General Public License v3.0 | 3 votes |
/** * sorts the int array in ascending order by attribute indexed * by a in dataset data. * @param the data the indices represent * @param the index of the attribute to sort by * @return array of sorted indicies */ private int [] sortByAttribute(Instances data, int a) { double [] attList = data.attributeToDoubleArray(a); int [] temp = Utils.sort(attList); return temp; }
Example 19
Source File: Ex01_Datahandling.java From tsml with GNU General Public License v3.0 | 2 votes |
public static void main(String[] args) throws Exception { // We'll be loading the ItalyPowerDemand dataset which is distributed with this codebase String basePath = "src/main/java/experiments/data/tsc/"; String dataset = "ItalyPowerDemand"; int seed = 1; Instances train; Instances test; Instances[] trainTest; ///////////// Loading method 1: loading individual files // DatasetLoading.loadData...(...) // For loading in a single arff without performing any kind of sampling. Class value is // assumed to be the last attribute train = DatasetLoading.loadDataThrowable(basePath + dataset + "/" + dataset + "_TRAIN.arff"); test = DatasetLoading.loadDataThrowable(basePath + dataset + "/" + dataset + "_TEST.arff"); // We could then resample these, while maintaining train/test distributions, using this trainTest = InstanceTools.resampleTrainAndTestInstances(train, test, 1); train = trainTest[0]; test = trainTest[1]; ///////////// Loading method 2: sampling directly // DatasetLoading.sampleDataset(...) // Wraps the data loading and sampling performed above. Read in a dataset either // from a single complete file (e.g. uci data) or a predefined split (e.g. ucr/tsc data) // and resamples it according to the seed given. If the resampled fold can already // be found in the read location ({dsetname}{foldid}_TRAIN and _TEST) then it will // load those. See the sampleDataset(...) javadoc trainTest = DatasetLoading.sampleDataset(basePath, dataset, seed); train = trainTest[0]; test = trainTest[1]; ///////////// Loading method 3: sampling the built in dataset // DatasetLoading.sampleDataset(...) // Because ItalyPowerDemand is distributed with the codebase, there's a wrapper // to sample it directly for quick testing trainTest = DatasetLoading.sampleItalyPowerDemand(seed); train = trainTest[0]; test = trainTest[1]; //////////// Data inspection and handling: // We can look at the basic meta info System.out.println("train.relationName() = " + train.relationName()); System.out.println("train.numInstances() = " + train.numInstances()); System.out.println("train.numAttributes() = " + train.numAttributes()); System.out.println("train.numClasses() = " + train.numClasses()); // And the individual instances for (Instance inst : train) System.out.print(inst.classValue() + ", "); System.out.println(""); // Often for speed we just want the data in a primitive array // We can go to and from them using this sort of procedure // Lets keeps the class labels separate in this example double[] classLabels = train.attributeToDoubleArray(train.classIndex()); // aka y_train boolean removeLastVal = true; double[][] data = InstanceTools.fromWekaInstancesArray(train, removeLastVal); // aka X_train // We can then do whatever fast array-optimised stuff, and shove it back into an instances object Instances reformedTrain = InstanceTools.toWekaInstances(data, classLabels); }