weka.core.Instances#attributeToDoubleArray

Source File: ClusteringUtilities.java From tsml with GNU General Public License v3.0

6 votes

public static double randIndex(int[] predicted, Instances inst){
    double[] actual = inst.attributeToDoubleArray(inst.classIndex());

    double A = 0, B = 0, C = 0, D = 0;

    for (int i = 0; i < predicted.length; i++){
        for (int n = 0; n < actual.length; n++){
            if ((predicted[i] == predicted[n]) && (actual[i] == actual[n])){
                A++;
            }
            else if ((predicted[i] != predicted[n]) && (actual[i] != actual[n])){
                B++;
            }
            else if ((predicted[i] == predicted[n]) && (actual[i] != actual[n])){
                C++;
            }
            else{
                D++;
            }
        }
    }

    return (A + B)/(A + B + C + D);
}

Source File: ThresholdCurve.java From tsml with GNU General Public License v3.0

6 votes

/**
 * Gets the index of the instance with the closest threshold value to the
 * desired target
 *
 * @param tcurve a set of instances that have been generated by this class
 * @param threshold the target threshold
 * @return the index of the instance that has threshold closest to
 * the target, or -1 if this could not be found (i.e. no data, or
 * bad threshold target)
 */
public static int getThresholdInstance(Instances tcurve, double threshold) {

  if (!RELATION_NAME.equals(tcurve.relationName()) 
      || (tcurve.numInstances() == 0)
      || (threshold < 0)
      || (threshold > 1.0)) {
    return -1;
  }
  if (tcurve.numInstances() == 1) {
    return 0;
  }
  double [] tvals = tcurve.attributeToDoubleArray(tcurve.numAttributes() - 1);
  int [] sorted = Utils.sort(tvals);
  return binarySearch(sorted, tvals, threshold);
}

Source File: MultiLinearRegression.java From tsml with GNU General Public License v3.0

6 votes

@Override
public void buildClassifier(Instances data) throws Exception {
    //creating the 2class version of the insts
    numericClassInsts = new Instances(data);
    numericClassInsts.setClassIndex(0); //temporary
    numericClassInsts.deleteAttributeAt(numericClassInsts.numAttributes()-1);
    Attribute newClassAtt = new Attribute("newClassVal"); //numeric class
    numericClassInsts.insertAttributeAt(newClassAtt, numericClassInsts.numAttributes());
    numericClassInsts.setClassIndex(numericClassInsts.numAttributes()-1); //temporary

    //and building the regressors
    regressors = new LinearRegression[data.numClasses()];
    double[] trueClassVals = data.attributeToDoubleArray(data.classIndex());
    for (int c = 0; c < data.numClasses(); c++) {

        for (int i = 0; i < numericClassInsts.numInstances(); i++) {
            //if this inst is of the class we're currently handling (c), set new class val to 1 else 0
            double cval = trueClassVals[i] == c ? 1 : 0; 
            numericClassInsts.instance(i).setClassValue(cval);
        }    

        regressors[c] = new LinearRegression();
        regressors[c].buildClassifier(numericClassInsts);
    }
}

Source File: MultiResponseModelTrees.java From tsml with GNU General Public License v3.0

6 votes

@Override
public void buildClassifier(Instances data) throws Exception {
    //creating the 2class version of the insts
    numericClassInsts = new Instances(data);
    numericClassInsts.setClassIndex(0); //temporary
    numericClassInsts.deleteAttributeAt(numericClassInsts.numAttributes()-1);
    Attribute newClassAtt = new Attribute("newClassVal"); //numeric class
    numericClassInsts.insertAttributeAt(newClassAtt, numericClassInsts.numAttributes());
    numericClassInsts.setClassIndex(numericClassInsts.numAttributes()-1); //temporary

    //and building the regressors
    regressors = new M5P[data.numClasses()];
    double[] trueClassVals = data.attributeToDoubleArray(data.classIndex());
    for (int c = 0; c < data.numClasses(); c++) {

        for (int i = 0; i < numericClassInsts.numInstances(); i++) {
            //if this inst is of the class we're currently handling (c), set new class val to 1 else 0
            double cval = trueClassVals[i] == c ? 1 : 0; 
            numericClassInsts.instance(i).setClassValue(cval);
        }    

        regressors[c] = new M5P();
        regressors[c].buildClassifier(numericClassInsts);
    }
}

Source File: Reciprocal.java From tsml with GNU General Public License v3.0

5 votes

public Instances transform(Instances data){
//Not ideal, should call a method to get this
        int responsePos=data.numAttributes()-1;
        double[] response=data.attributeToDoubleArray(responsePos);
//Find the min value
        double min=response[0];
        for(int i=0;i<response.length;i++)
        {
                if(response[i]<min)
                        min=response[i];
        }
        if(min<=zeroOffset)	//Cant take a log of a negative, so offset
        {
                offSet=-min+zeroOffset;
        }
        else
                offSet=0;
        System.out.println(" Min value = "+min+" offset = "+offSet);

        for(int i=0;i<data.numInstances();i++)
        {
            Instance t = data.instance(i);
            double resp=t.value(responsePos);
            System.out.print(i+" "+resp);
            resp=1/(resp+offSet);
            System.out.println(" "+resp);
            t.setValue(responsePos,resp);
        }
        return data;
    }

Source File: MatrixUtils.java From meka with GNU General Public License v3.0

5 votes

/**
 * Helper method that transforma an Instances object to a Matrix object.
 *
 * @param inst The Instances to transform.
 * @return  The resulting Matrix object.
 */
public static Matrix instancesToMatrix(Instances inst){
	double[][] darr = new double[inst.numInstances()][inst.numAttributes()];
	for (int i =0 ; i < inst.numAttributes(); i++) {
		for (int j = 0; j < inst.attributeToDoubleArray(i).length; j++) {
			darr[j][i] = inst.attributeToDoubleArray(i)[j];
		}
	}
	return new Matrix(darr);
}

Source File: LinearModel.java From tsml with GNU General Public License v3.0

5 votes

public  double[] formTestPredictions(Instances testData)
	{
//Form X matrix from testData
		int rows=testData.numInstances();
		int cols=testData.numAttributes();	//includes the constant term
		predicted=new double[rows];
		if(cols!=m)
		{
			System.out.println("Error: Mismatch in attribute lengths in form test Train ="+m+" Test ="+cols);
			System.exit(0);
		}
		double[][] xt = new double[cols][rows];
		for(int i=0;i<rows;i++)
			xt[0][i]=1;
		for(int i=1;i<cols;i++)
			xt[i]=testData.attributeToDoubleArray(i-1);
		Matrix testX=new Matrix(xt);
		testX=testX.transpose();
		
		for(int i=0;i<rows;i++)
		{
			//Find predicted
			predicted[i]=paras[0];
			for(int j=1;j<paras.length;j++)
				predicted[i]+=paras[j]*testX.get(i,j);
		}
		return predicted;
	
	}

Source File: LinearModel.java From tsml with GNU General Public License v3.0

5 votes

public LinearModel(Instances data)
	{
//Form X and Y from Instances		
		n=data.numInstances();
		m=data.numAttributes();	//includes the constant term
		y = data.attributeToDoubleArray(data.classIndex());
		Y=new Matrix(y,y.length);
		double[][] xt = new double[m][n];
		for(int i=0;i<n;i++)
			xt[0][i]=1;
		for(int i=1;i<m;i++)
			xt[i]=data.attributeToDoubleArray(i-1);
		Xt=new Matrix(xt);
		X=Xt.transpose();
	}

Source File: YeoJohnson.java From tsml with GNU General Public License v3.0

5 votes

@Override 
public Instances transform(Instances data)
{
           System.out.println(" Doesnt do anything! ");
	int responsePos=data.numAttributes()-1;
	double[] response=data.attributeToDoubleArray(responsePos);
	double[] preds=new double[response.length];	
	double v;
	Instance inst;
	return data;
}

Source File: YeoJohnson.java From tsml with GNU General Public License v3.0

5 votes

static public double findBestTransform(Instances data, int pos, double[] power)
	{
		int responsePos=data.classIndex();
		double[] temp=data.attributeToDoubleArray(responsePos);
		double[] response=new double[temp.length];
                System.arraycopy(temp, 0, response, 0, temp.length);
		double[] predictions=new double[response.length];	
		double v;
		Instance inst;
		LinearModel lm;
		double bestLambda=MIN,minError=Double.MAX_VALUE,error;
		double correlation;
		for(double lambda=MIN;lambda<=MAX;lambda+=INTERVAL)
		{
//Transform response				
			transformResponse(data,lambda,response);
			lm=new LinearModel(data);
			lm.fitModel();
			lm.formTrainPredictions();
			lm.findTrainStatistics();
			
//Use the K-S stat for this		
			error=ResidualTests.kolmogorovSmirnoff(lm.stdResidual);
			correlation=ResidualTests.testHeteroscadisity(lm.y,lm.predicted);
			if(error<minError)
			{
				bestLambda=lambda;
				minError=error;
			}
//			System.out.println(" Lambda ="+lambda+" KS Stat = "+error+" Correlation = "+correlation);
		}
		power[pos]=bestLambda;
		return minError;
	}

Source File: YeoJohnson.java From tsml with GNU General Public License v3.0

5 votes

static public Instances invertResponse(Instances data, double lambda){
	Instance inst;
	int responsePos=data.classIndex();
	double[] response=data.attributeToDoubleArray(responsePos);
	double v;
	for(int i=0;i<response.length;i++)
	{
		inst=data.instance(i);

		
		if(response[i]<0)
		{
			if(lambda!=2)
				v=-(Math.pow((1-response[i]),2-lambda)-1)/(2-lambda);
			else
				v=-Math.log(1-response[i]);
		}
		else
		{
			if(lambda==0)
				v=Math.log(1+response[i]);
			else
				v=(Math.pow(response[i]+1,lambda)-1)/lambda;
		}
		inst.setValue(responsePos,v);
	}
	
	return data;
}

Source File: YeoJohnson.java From tsml with GNU General Public License v3.0

5 votes

@Override
public Instances invert(Instances data){
	Instance inst;
	int responsePos=data.numAttributes()-1;
	double[] response=data.attributeToDoubleArray(responsePos);
	double v;
	double[] newVals=invert(bestLambda,response);
	
	for(int i=0;i<data.numInstances();i++)
	{
		inst=data.instance(i);
		inst.setValue(responsePos,newVals[i]);
	}
	return data;
}

Source File: ThresholdCurve.java From tsml with GNU General Public License v3.0

5 votes

/**
  * Calculates the area under the ROC curve as the Wilcoxon-Mann-Whitney statistic.
  *
  * @param tcurve a previously extracted threshold curve Instances.
  * @return the ROC area, or Double.NaN if you don't pass in 
  * a ThresholdCurve generated Instances. 
  */
 public static double getROCArea(Instances tcurve) {

   final int n = tcurve.numInstances();
   if (!RELATION_NAME.equals(tcurve.relationName()) 
       || (n == 0)) {
     return Double.NaN;
   }
   final int tpInd = tcurve.attribute(TRUE_POS_NAME).index();
   final int fpInd = tcurve.attribute(FALSE_POS_NAME).index();
   final double [] tpVals = tcurve.attributeToDoubleArray(tpInd);
   final double [] fpVals = tcurve.attributeToDoubleArray(fpInd);

   double area = 0.0, cumNeg = 0.0;
   final double totalPos = tpVals[0];
   final double totalNeg = fpVals[0];
   for (int i = 0; i < n; i++) {
double cip, cin;
if (i < n - 1) {
    cip = tpVals[i] - tpVals[i + 1];
    cin = fpVals[i] - fpVals[i + 1];
} else {
    cip = tpVals[n - 1];
    cin = fpVals[n - 1];
}
area += cip * (cumNeg + (0.5 * cin));
cumNeg += cin;
   }
   area /= (totalNeg * totalPos);

   return area;
 }

Source File: ThresholdCurve.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Calculates the area under the precision-recall curve (AUPRC).
 *
 * @param tcurve a previously extracted threshold curve Instances.
 * @return the PRC area, or Double.NaN if you don't pass in 
 * a ThresholdCurve generated Instances. 
 */
public static double getPRCArea(Instances tcurve) {
  final int n = tcurve.numInstances();
  if (!RELATION_NAME.equals(tcurve.relationName()) 
      || (n == 0)) {
    return Double.NaN;
  }
  
  final int pInd = tcurve.attribute(PRECISION_NAME).index();
  final int rInd = tcurve.attribute(RECALL_NAME).index();
  final double [] pVals = tcurve.attributeToDoubleArray(pInd);
  final double [] rVals = tcurve.attributeToDoubleArray(rInd);
  
  double area = 0;
  double xlast = rVals[n - 1];
  
  // start from the first real p/r pair (not the artificial zero point)
  for (int i = n - 2; i >= 0; i--) {
    double recallDelta = rVals[i] - xlast;
    area += (pVals[i] * recallDelta);
    
    xlast = rVals[i];
  }
  
  if (area == 0) {
    return Utils.missingValue();
  }
  return area;
}

Source File: InterquartileRange.java From tsml with GNU General Public License v3.0

4 votes

/**
  * computes the thresholds for outliers and extreme values
  * 
  * @param instances	the data to work on
  */
 protected void computeThresholds(Instances instances) {
   int		i;
   double[]	values;
   int[]	sortedIndices;
   int		half;
   int		quarter;
   double	q1;
   double	q2;
   double	q3;
   
   m_UpperExtremeValue = new double[m_AttributeIndices.length];
   m_UpperOutlier      = new double[m_AttributeIndices.length];
   m_LowerOutlier      = new double[m_AttributeIndices.length];
   m_LowerExtremeValue = new double[m_AttributeIndices.length];
   m_Median            = new double[m_AttributeIndices.length];
   m_IQR               = new double[m_AttributeIndices.length];
   
   for (i = 0; i < m_AttributeIndices.length; i++) {
     // non-numeric attribute?
     if (m_AttributeIndices[i] == NON_NUMERIC)
continue;
     
     // sort attribute data
     values        = instances.attributeToDoubleArray(m_AttributeIndices[i]);
     sortedIndices = Utils.sort(values);
     
     // determine indices
     half    = sortedIndices.length / 2;
     quarter = half / 2;
     
     if (sortedIndices.length % 2 == 1) {
q2 = values[sortedIndices[half]];
     }
     else {
q2 = (values[sortedIndices[half]] + values[sortedIndices[half + 1]]) / 2;
     }
     
     if (half % 2 == 1) {
q1 = values[sortedIndices[quarter]];
q3 = values[sortedIndices[sortedIndices.length - quarter - 1]];
     }
     else {
q1 = (values[sortedIndices[quarter]] + values[sortedIndices[quarter + 1]]) / 2;
q3 = (values[sortedIndices[sortedIndices.length - quarter - 1]] + values[sortedIndices[sortedIndices.length - quarter]]) / 2;
     }
     
     // determine thresholds and other values
     m_Median[i]            = q2;
     m_IQR[i]               = q3 - q1;
     m_UpperExtremeValue[i] = q3 + getExtremeValuesFactor() * m_IQR[i];
     m_UpperOutlier[i]      = q3 + getOutlierFactor()       * m_IQR[i];
     m_LowerOutlier[i]      = q1 - getOutlierFactor()       * m_IQR[i];
     m_LowerExtremeValue[i] = q1 - getExtremeValuesFactor() * m_IQR[i];
   }
 }

Source File: BoxTidwell.java From tsml with GNU General Public License v3.0

4 votes

public static Instances transformRegressor(Instances data, int pos,int resultPos, double[] powers)
	{

//1. Get values of the attribute of interest. 
		
//Confusingly, am working with attributes in rows not columns		
		double[] temp=data.attributeToDoubleArray(pos);
		double[] originalData= new double[temp.length];
		double[] logData= new double[temp.length];
		
		for(int i=0;i<temp.length;i++)
		{
			originalData[i]=temp[i];
			logData[i]=Math.log(temp[i]);	
		}
		double[] y =data.attributeToDoubleArray(data.classIndex()); 
//		I'm not sure if this is a memory copy or a reference copy, so be safe
		double[][] transposeFirst = new double[data.numAttributes()][data.numInstances()];
		double[][] transposeSecond = new double[data.numAttributes()+1][data.numInstances()];
		for(int j=0;j<data.numInstances();j++)
		{
			transposeFirst[0][j]=transposeSecond[0][j]=1;
		}
		for(int i=1;i<data.numAttributes();i++)
		{
			transposeFirst[i]=transposeSecond[i]=data.attributeToDoubleArray(i-1);
		}
//		Add one to pos cos of the ones
		pos=pos+1;
//		Second has an attribute at the end of data for transform
		int workingPos=data.numAttributes();
		LinearModel l1,l2;
		double alpha=1, b1,b2;
		double min=0.1;
		boolean finished=false;
		int count=0;
		final int MaxIterations=10;
		//		Initialise alpha to 1
//Find Base SSE		
		//While not termination condition
		while(!finished)
		{
//			System.out.println(" Iteration = "+(count+1)+" alpha = "+alpha);
			//Create new attributes
			//1. Calculate x^alpha
			for(int j=0;j<originalData.length;j++)
			{
				transposeSecond[pos][j]=transposeFirst[pos][j]=Math.pow(originalData[j],alpha);
			}

			//2. Fit y=b1+ .. b_pos	x^alpha (+ other terms)-> get b_pos
			l1=new LinearModel(transposeFirst,y);	
			l1.fitModel();
			
//Not necessary: 
//			l1.formTrainPredictions();
//			l1.findTrainStatistics();
//			System.out.println(l1+"\nVariance for L1 = "+l1.variance);
			
			b1=l1.paras[pos];
			//3. Fit y=b*1+ .. b*_pos	x^alpha +b*_workingPos x^alpha*log(x) (+ other terms)-> get b*2
			//2. Calculate x^alpha*log(x)
			for(int j=0;j<originalData.length;j++)
				transposeSecond[workingPos][j]=transposeFirst[pos][j]*logData[j];
			l2=new LinearModel(transposeSecond,y);	
			l2.fitModel();
			
//			Not necessary: 
//			l2.formTrainPredictions();
//			l2.findTrainStatistics();
//			System.out.println(l2+"\nVariance for L2 = "+l2.variance);
			
			b2=l2.paras[workingPos];
			
			alpha+=b2/b1;
			//Work out change term alpha = b*2/b1+alpha0
//			System.out.println("New Alpha ="+alpha+" b1 = "+b1+" b2 = "+b2);
			//Update termination criteria: stop if small change: check notes
			count++;
			if(Math.abs(b2/b1)<min || count>=MaxIterations)
				finished=true;
			else if(Math.abs(alpha)>10)
			{
				alpha=1;
				finished=true;
			}
		}
//Fix original 
		powers[resultPos]=alpha;
		pos=pos-1;
		Instance inst;
		for(int i=0;i<data.numInstances();i++)
		{
			inst=data.instance(i);
			inst.setValue(pos,Math.pow(originalData[i],alpha));
		}
		return data;
	}

Source File: ThresholdCurve.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Calculates the n point precision result, which is the precision averaged
 * over n evenly spaced (w.r.t recall) samples of the curve.
 *
 * @param tcurve a previously extracted threshold curve Instances.
 * @param n the number of points to average over.
 * @return the n-point precision.
 */
public static double getNPointPrecision(Instances tcurve, int n) {

  if (!RELATION_NAME.equals(tcurve.relationName()) 
      || (tcurve.numInstances() == 0)) {
    return Double.NaN;
  }
  int recallInd = tcurve.attribute(RECALL_NAME).index();
  int precisInd = tcurve.attribute(PRECISION_NAME).index();
  double [] recallVals = tcurve.attributeToDoubleArray(recallInd);
  int [] sorted = Utils.sort(recallVals);
  double isize = 1.0 / (n - 1);
  double psum = 0;
  for (int i = 0; i < n; i++) {
    int pos = binarySearch(sorted, recallVals, i * isize);
    double recall = recallVals[sorted[pos]];
    double precis = tcurve.instance(sorted[pos]).value(precisInd);
    /*
    System.err.println("Point " + (i + 1) + ": i=" + pos 
                       + " r=" + (i * isize)
                       + " p'=" + precis 
                       + " r'=" + recall);
    */
    // interpolate figures for non-endpoints
    while ((pos != 0) && (pos < sorted.length - 1)) {
      pos++;
      double recall2 = recallVals[sorted[pos]];
      if (recall2 != recall) {
        double precis2 = tcurve.instance(sorted[pos]).value(precisInd);
        double slope = (precis2 - precis) / (recall2 - recall);
        double offset = precis - recall * slope;
        precis = isize * i * slope + offset;
        /*
        System.err.println("Point2 " + (i + 1) + ": i=" + pos 
                           + " r=" + (i * isize)
                           + " p'=" + precis2 
                           + " r'=" + recall2
                           + " p''=" + precis);
        */
        break;
      }
    }
    psum += precis;
  }
  return psum / n;
}

Source File: C45PruneableClassifierTreeG.java From tsml with GNU General Public License v3.0

3 votes

/**
 * sorts the int array in ascending order by attribute indexed 
 * by a in dataset data.  
 * @param the data the indices represent
 * @param the index of the attribute to sort by
 * @return array of sorted indicies
 */
private int [] sortByAttribute(Instances data, int a) {

  double [] attList = data.attributeToDoubleArray(a);
  int [] temp = Utils.sort(attList);
  return temp;
}

Source File: Ex01_Datahandling.java From tsml with GNU General Public License v3.0

2 votes

public static void main(String[] args) throws Exception {
    
    // We'll be loading the ItalyPowerDemand dataset which is distributed with this codebase
    String basePath = "src/main/java/experiments/data/tsc/";
    String dataset = "ItalyPowerDemand";
    int seed = 1;
    
    Instances train;
    Instances test;
    Instances[] trainTest;
    
    
    
    
    ///////////// Loading method 1: loading individual files
    // DatasetLoading.loadData...(...)
    // For loading in a single arff without performing any kind of sampling. Class value is 
    // assumed to be the last attribute
    
    train = DatasetLoading.loadDataThrowable(basePath + dataset + "/" + dataset + "_TRAIN.arff");
    test = DatasetLoading.loadDataThrowable(basePath + dataset + "/" + dataset + "_TEST.arff");
    
    // We could then resample these, while maintaining train/test distributions, using this
    
    trainTest = InstanceTools.resampleTrainAndTestInstances(train, test, 1);
    train = trainTest[0];
    test = trainTest[1];
    
    
    
    
    
    
    ///////////// Loading method 2: sampling directly
    // DatasetLoading.sampleDataset(...)
    // Wraps the data loading and sampling performed above. Read in a dataset either
    // from a single complete file (e.g. uci data) or a predefined split (e.g. ucr/tsc data) 
    // and resamples it according to the seed given. If the resampled fold can already 
    // be found in the read location ({dsetname}{foldid}_TRAIN and _TEST) then it will
    // load those. See the sampleDataset(...) javadoc
    
    trainTest = DatasetLoading.sampleDataset(basePath, dataset, seed);
    train = trainTest[0];
    test = trainTest[1];
    
    
    
    
    
    
    ///////////// Loading method 3: sampling the built in dataset
    // DatasetLoading.sampleDataset(...)
    // Because ItalyPowerDemand is distributed with the codebase, there's a wrapper 
    // to sample it directly for quick testing 
    
    trainTest = DatasetLoading.sampleItalyPowerDemand(seed);
    train = trainTest[0];
    test = trainTest[1];
    
    
    
    
    
    
    //////////// Data inspection and handling:
    // We can look at the basic meta info
    
    System.out.println("train.relationName() = " + train.relationName());
    System.out.println("train.numInstances() = " + train.numInstances());
    System.out.println("train.numAttributes() = " + train.numAttributes());
    System.out.println("train.numClasses() = " + train.numClasses());
    
    // And the individual instances
    
    for (Instance inst : train)
        System.out.print(inst.classValue() + ", ");
    System.out.println("");
    
    
    
    
    
    
    
    
    
    // Often for speed we just want the data in a primitive array
    // We can go to and from them using this sort of procedure
    
    // Lets keeps the class labels separate in this example
    double[] classLabels = train.attributeToDoubleArray(train.classIndex()); // aka y_train
    
    boolean removeLastVal = true;
    double[][] data = InstanceTools.fromWekaInstancesArray(train, removeLastVal); // aka X_train
    
    // We can then do whatever fast array-optimised stuff, and shove it back into an instances object
    Instances reformedTrain = InstanceTools.toWekaInstances(data, classLabels);
}

Java Code Examples for weka.core.Instances#attributeToDoubleArray()