Java Code Examples for weka.filters.unsupervised.attribute.ReplaceMissingValues#setInputFormat()

The following examples show how to use weka.filters.unsupervised.attribute.ReplaceMissingValues#setInputFormat() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LMT.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Builds the classifier.
 *
 * @param data the data to train with
 * @throws Exception if classifier can't be built successfully
 */
public void buildClassifier(Instances data) throws Exception{
	
  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  // remove instances with missing class
  Instances filteredData = new Instances(data);
  filteredData.deleteWithMissingClass();
  
  //replace missing values
  m_replaceMissing = new ReplaceMissingValues();
  m_replaceMissing.setInputFormat(filteredData);	
  filteredData = Filter.useFilter(filteredData, m_replaceMissing);	
	
  //possibly convert nominal attributes globally
  if (m_convertNominal) {	    
    m_nominalToBinary = new NominalToBinary();
    m_nominalToBinary.setInputFormat(filteredData);	
    filteredData = Filter.useFilter(filteredData, m_nominalToBinary);
  }

  int minNumInstances = 2;
	
  //create ModelSelection object, either for splits on the residuals or for splits on the class value 
  ModelSelection modSelection;	
  if (m_splitOnResiduals) {
    modSelection = new ResidualModelSelection(minNumInstances);
  } else {
    modSelection = new C45ModelSelection(minNumInstances, filteredData, true);
  }
	
  //create tree root
  m_tree = new LMTNode(modSelection, m_numBoostingIterations, m_fastRegression, 
	 m_errorOnProbabilities, m_minNumInstances, m_weightTrimBeta, m_useAIC);
  //build tree
  m_tree.buildClassifier(filteredData);

  if (modSelection instanceof C45ModelSelection) ((C45ModelSelection)modSelection).cleanup();
}
 
Example 2
Source File: LeastMedSq.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Cleans up data
 *
 * @param data data to be cleaned up
 * @throws Exception if an error occurs
 */
private void cleanUpData(Instances data)throws Exception{

  m_Data = data;
  m_TransformFilter = new NominalToBinary();
  m_TransformFilter.setInputFormat(m_Data);
  m_Data = Filter.useFilter(m_Data, m_TransformFilter);
  m_MissingFilter = new ReplaceMissingValues();
  m_MissingFilter.setInputFormat(m_Data);
  m_Data = Filter.useFilter(m_Data, m_MissingFilter);
  m_Data.deleteWithMissingClass();
}
 
Example 3
Source File: ClassifierTools.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public static Instances estimateMissing(Instances data){

		ReplaceMissingValues nb = new ReplaceMissingValues();
		Instances nd=null;
		try{
			nb.setInputFormat(data);
			Instance temp;
			int n = data.numInstances();
			for(int i=0;i<n;i++)
				nb.input(data.instance(i));
			System.out.println(" Instances input");
			System.out.println(" Output format retrieved");
//			nd=Filter.useFilter(data,nb);
//			System.out.println(" Filtered? num atts = "+nd.numAttributes()+" num inst = "+nd.numInstances()+" filter = "+nb);
			if(nb.batchFinished())
				System.out.println(" batch finished ");
			nd=nb.getOutputFormat();
			for(int i=0;i<n;i++)
			{
				temp=nb.output();
//				System.out.println(temp); 
				nd.add(temp);
			}
		}catch(Exception e)
		{
			System.out.println("Error in estimateMissing  = "+e.toString());
			nd=data;
			System.exit(0);
			
		}
		return nd;
		
		}
 
Example 4
Source File: MakeDensityBasedClusterer.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Builds a clusterer for a set of instances.
 *
 * @param data the instances to train the clusterer with
 * @throws Exception if the clusterer hasn't been set or something goes wrong
 */  
public void buildClusterer(Instances data) throws Exception {
  // can clusterer handle the data?
  getCapabilities().testWithFail(data);

  m_replaceMissing = new ReplaceMissingValues();
  m_replaceMissing.setInputFormat(data);
  data = weka.filters.Filter.useFilter(data, m_replaceMissing);

  m_theInstances = new Instances(data, 0);
  if (m_wrappedClusterer == null) {
    throw new Exception("No clusterer has been set");
  }
  m_wrappedClusterer.buildClusterer(data);
  m_model = 
     new DiscreteEstimator[m_wrappedClusterer.numberOfClusters()][data.numAttributes()];
  m_modelNormal = 
    new double[m_wrappedClusterer.numberOfClusters()][data.numAttributes()][2];
  double[][] weights =  new double[m_wrappedClusterer.numberOfClusters()][data.numAttributes()];
  m_priors = new double[m_wrappedClusterer.numberOfClusters()]; 
   for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) {
     m_priors[i] = 1.0; // laplace correction
     for (int j = 0; j < data.numAttributes(); j++) {
if (data.attribute(j).isNominal()) {
  m_model[i][j] = new DiscreteEstimator(data.attribute(j).numValues(),
				 true);
}
     }
   }
   
   Instance inst = null;

   // Compute mean, etc.
   int[] clusterIndex = new int[data.numInstances()];
   for (int i = 0; i < data.numInstances(); i++) {
     inst = data.instance(i);
     int cluster = m_wrappedClusterer.clusterInstance(inst);
     m_priors[cluster] += inst.weight();
     for (int j = 0; j < data.numAttributes(); j++) {
if (!inst.isMissing(j)) {
  if (data.attribute(j).isNominal()) {
    m_model[cluster][j].addValue(inst.value(j),inst.weight());
  } else {
    m_modelNormal[cluster][j][0] += inst.weight() * inst.value(j);
    weights[cluster][j] += inst.weight();
  }
}
     }
     clusterIndex[i] = cluster;
   }

   for (int j = 0; j < data.numAttributes(); j++) {
     if (data.attribute(j).isNumeric()) {
for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) {	   
  if (weights[i][j] > 0) {
    m_modelNormal[i][j][0] /= weights[i][j];
  }
}
     }
   }

   // Compute standard deviations
   for (int i = 0; i < data.numInstances(); i++) {
     inst = data.instance(i);
     for (int j = 0; j < data.numAttributes(); j++) {
if (!inst.isMissing(j)) {
  if (data.attribute(j).isNumeric()) {
    double diff = m_modelNormal[clusterIndex[i]][j][0] - inst.value(j);
    m_modelNormal[clusterIndex[i]][j][1] += inst.weight() * diff * diff;
  }
}
     }
   }

   for (int j = 0; j < data.numAttributes(); j++) {
     if (data.attribute(j).isNumeric()) {
for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) {	   
  if (weights[i][j] > 0) {
    m_modelNormal[i][j][1] = 
      Math.sqrt(m_modelNormal[i][j][1] / weights[i][j]);
  } else if (weights[i][j] <= 0) {
    m_modelNormal[i][j][1] = Double.MAX_VALUE;
  }
  if (m_modelNormal[i][j][1] <= m_minStdDev) {
    m_modelNormal[i][j][1] = data.attributeStats(j).numericStats.stdDev;
    if (m_modelNormal[i][j][1] <= m_minStdDev) {
      m_modelNormal[i][j][1] = m_minStdDev;
    }
  }
}
     }
   }
   
   Utils.normalize(m_priors);
}
 
Example 5
Source File: FT.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Builds the classifier.
 *
 * @param data the data to train with
 * @throws Exception if classifier can't be built successfully
 */
public void buildClassifier(Instances data) throws Exception{
	
    
  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  // remove instances with missing class
  Instances filteredData = new Instances(data);
  filteredData.deleteWithMissingClass();
  
  //replace missing values
  m_replaceMissing = new ReplaceMissingValues();
  m_replaceMissing.setInputFormat(filteredData);	
  filteredData = Filter.useFilter(filteredData, m_replaceMissing);
  
  //possibly convert nominal attributes globally
  if (m_convertNominal) {	    
    m_nominalToBinary = new NominalToBinary();
    m_nominalToBinary.setInputFormat(filteredData);	
    filteredData = Filter.useFilter(filteredData, m_nominalToBinary);
  }
	
  int minNumInstances = 2;  
  
  
  //create a FT  tree root
  if (m_modelType==0)
    m_tree = new FTNode( m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, 
                         m_weightTrimBeta, m_useAIC);
                     
  //create a FTLeaves  tree root
  if (m_modelType==1){ 
    m_tree = new FTLeavesNode(m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, 
                              m_weightTrimBeta, m_useAIC);
  }
  //create a FTInner  tree root
  if (m_modelType==2)
    m_tree = new FTInnerNode(m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, 
                             m_weightTrimBeta, m_useAIC);
      
  //build tree
  m_tree.buildClassifier(filteredData);
  // prune tree
  m_tree.prune();
  m_tree.assignIDs(0);
  m_tree.cleanup();         
}
 
Example 6
Source File: OrbitModel.java    From orbit-image-analysis with GNU General Public License v3.0 4 votes vote down vote up
/**
 * convert models from old weka version
 *
 * @param model
 */
public static void fixOldModelVersion(final OrbitModel model) {
    if (model == null) return; // nothing to fix
    boolean oldWekaVersion = false;
    try {
        model.getStructure().classAttribute().numValues();
    } catch (NullPointerException ne) {
        oldWekaVersion = true;
    }

    // apply old model fix?
    if (oldWekaVersion) {
        logger.info("model from old weka version (< 3.7.11) detected, trying to apply fixes");
        int numClasses = model.getClassShapes().size();
        TissueFeatures tf = new TissueFeatures(model.getFeatureDescription(), null);
        int numFeatures = tf.getFeaturesPerSample() * model.getFeatureDescription().getSampleSize() + 1;
        ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(numFeatures);
        for (int a = 0; a < numFeatures - 1; a++) {
            Attribute attr = new Attribute("a" + a);
            attrInfo.add(attr);
        }
        List<String> classValues = new ArrayList<String>(numClasses);
        for (int i = 0; i < numClasses; i++) {
            classValues.add((i + 1) + ".0"); // "1.0", "2.0", ...
        }
        Attribute classAttr = new Attribute("class", classValues);
        attrInfo.add(classAttr);

        Instances structure = new Instances("trainSet pattern classes", attrInfo, 0);
        structure.setClassIndex(numFeatures - 1);
        model.setStructure(structure);

        try {
            if (model.getClassifier() != null && model.getClassifier().getClassifier() != null && model.getClassifier().getClassifier() instanceof SMO) {
                SMO smo = ((SMO) model.getClassifier().getClassifier());

                Field field = smo.getClass().getDeclaredField("m_classAttribute");
                field.setAccessible(true);
                field.set(smo, classAttr);

                // missing values
                ReplaceMissingValues rmv = new ReplaceMissingValues();
                rmv.setInputFormat(structure);

                Field missing = smo.getClass().getDeclaredField("m_Missing");
                missing.setAccessible(true);
                missing.set(smo, rmv);

                // filter
                Field filter = smo.getClass().getDeclaredField("m_Filter");
                filter.setAccessible(true);
                Filter normalize = (Filter) filter.get(smo);

                RelationalLocator relLoc = new RelationalLocator(structure);
                StringLocator strLoc = new StringLocator(structure);

                Field outputRelAtts = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_OutputRelAtts");
                outputRelAtts.setAccessible(true);
                outputRelAtts.set(normalize, relLoc);

                Field inputRelAtts = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_InputRelAtts");
                inputRelAtts.setAccessible(true);
                inputRelAtts.set(normalize, relLoc);

                Field outputStrAtts = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_OutputStringAtts");
                outputStrAtts.setAccessible(true);
                outputStrAtts.set(normalize, strLoc);

                Field inputStrAtts = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_InputStringAtts");
                inputStrAtts.setAccessible(true);
                inputStrAtts.set(normalize, strLoc);

                Field outputFormat = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_OutputFormat");
                outputFormat.setAccessible(true);
                outputFormat.set(normalize, structure);

                logger.info("fixes applied, the model should work with a weka version >= 3.7.11 now");
            } // else: good luck...
        } catch (Exception e) {
            e.printStackTrace();
            logger.error("new weka version fixes could not be applied: " + e.getMessage());
        }
    } // old weka version
    fixOldModelVersion(model.getSegmentationModel());     // fixOldModelVersion can handle null
    fixOldModelVersion(model.getSecondarySegmentationModel());  // fixOldModelVersion can handle null
    fixOldModelVersion(model.getExclusionModel());  // fixOldModelVersion can handle null
}
 
Example 7
Source File: KddCup.java    From Machine-Learning-in-Java with MIT License 4 votes vote down vote up
public static Instances preProcessData(Instances data) throws Exception{
	
	/* 
	 * Remove useless attributes
	 */
	RemoveUseless removeUseless = new RemoveUseless();
	removeUseless.setOptions(new String[] { "-M", "99" });	// threshold
	removeUseless.setInputFormat(data);
	data = Filter.useFilter(data, removeUseless);

	
	/* 
	 * Remove useless attributes
	 */
	ReplaceMissingValues fixMissing = new ReplaceMissingValues();
	fixMissing.setInputFormat(data);
	data = Filter.useFilter(data, fixMissing);
	

	/* 
	 * Remove useless attributes
	 */
	Discretize discretizeNumeric = new Discretize();
	discretizeNumeric.setOptions(new String[] {
			"-O",
			"-M",  "-1.0", 
			"-B",  "4",  // no of bins
			"-R",  "first-last"}); //range of attributes
	fixMissing.setInputFormat(data);
	data = Filter.useFilter(data, fixMissing);

	/* 
	 * Select only informative attributes
	 */
	InfoGainAttributeEval eval = new InfoGainAttributeEval();
	Ranker search = new Ranker();
	search.setOptions(new String[] { "-T", "0.001" });	// information gain threshold
	AttributeSelection attSelect = new AttributeSelection();
	attSelect.setEvaluator(eval);
	attSelect.setSearch(search);
	
	// apply attribute selection
	attSelect.SelectAttributes(data);
	
	// remove the attributes not selected in the last run
	data = attSelect.reduceDimensionality(data);
	
	

	return data;
}
 
Example 8
Source File: YATSI.java    From collective-classification-weka-package with GNU General Public License v3.0 4 votes vote down vote up
/**
 * initializes the object
 * @param parent      the parent algorithm
 * @param train       the train instances
 * @param test        the test instances
 * @param setWeights  whether to set the weights for the training set 
 *                    (the processed instances)
 * @throws Exception  if something goes wrong
 */
public YATSIInstances(YATSI parent, Instances train, Instances test, 
                      boolean setWeights) 
  throws Exception {

  super();

  m_Parent = parent;

  // build sorted array (train + test)
  double weight;
  if (getParent().getNoWeights())
    weight = 1.0;
  else
    weight =   (double) train.numInstances() 
             / (double) test.numInstances()
             * getParent().getWeightingFactor();
  m_Unprocessed = new Instance[train.numInstances() + test.numInstances()];
  for (int i = 0; i < train.numInstances(); i++)
    m_Unprocessed[i] = train.instance(i);
  for (int i = 0; i < test.numInstances(); i++) {
    m_Unprocessed[train.numInstances() + i] = test.instance(i);
    m_Unprocessed[train.numInstances() + i].setWeight(weight);
  }
  Arrays.sort(m_Unprocessed, m_Comparator);

  // weights
  m_Weights = new double[m_Unprocessed.length];
  for (int i = 0; i < m_Unprocessed.length; i++) {
    m_Weights[i] = m_Unprocessed[i].weight();
    if (!setWeights)
      m_Unprocessed[i].setWeight(1);
  }

  // filter data
  m_Trainset  = new Instances(train, 0);
  for (int i = 0; i < m_Unprocessed.length; i++)
    m_Trainset.add(m_Unprocessed[i]);

  // set up filter
  m_Missing = new ReplaceMissingValues();
  m_Missing.setInputFormat(m_Trainset);
  m_Trainset = Filter.useFilter(m_Trainset, m_Missing); 
}