weka.filters.unsupervised.attribute.ReplaceMissingValues Java Examples
The following examples show how to use
weka.filters.unsupervised.attribute.ReplaceMissingValues.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CollectiveIBk.java From collective-classification-weka-package with GNU General Public License v3.0 | 6 votes |
/** * performs initialization of members */ @Override protected void initializeMembers() { super.initializeMembers(); m_KNNdetermined = -1; m_NeighborsTestset = null; m_TrainsetNew = null; m_TestsetNew = null; m_UseNaiveSearch = false; m_LabeledTestset = null; m_Missing = new ReplaceMissingValues(); m_Classifier = new IBk(); m_Classifier.setKNN(10); m_Classifier.setCrossValidate(true); m_Classifier.setWindowSize(0); m_Classifier.setMeanSquared(false); m_KNN = m_Classifier.getKNN(); m_AdditionalMeasures.add("measureDeterminedKNN"); }
Example #2
Source File: LMT.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Builds the classifier. * * @param data the data to train with * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances data) throws Exception{ // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances filteredData = new Instances(data); filteredData.deleteWithMissingClass(); //replace missing values m_replaceMissing = new ReplaceMissingValues(); m_replaceMissing.setInputFormat(filteredData); filteredData = Filter.useFilter(filteredData, m_replaceMissing); //possibly convert nominal attributes globally if (m_convertNominal) { m_nominalToBinary = new NominalToBinary(); m_nominalToBinary.setInputFormat(filteredData); filteredData = Filter.useFilter(filteredData, m_nominalToBinary); } int minNumInstances = 2; //create ModelSelection object, either for splits on the residuals or for splits on the class value ModelSelection modSelection; if (m_splitOnResiduals) { modSelection = new ResidualModelSelection(minNumInstances); } else { modSelection = new C45ModelSelection(minNumInstances, filteredData, true); } //create tree root m_tree = new LMTNode(modSelection, m_numBoostingIterations, m_fastRegression, m_errorOnProbabilities, m_minNumInstances, m_weightTrimBeta, m_useAIC); //build tree m_tree.buildClassifier(filteredData); if (modSelection instanceof C45ModelSelection) ((C45ModelSelection)modSelection).cleanup(); }
Example #3
Source File: LeastMedSq.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Cleans up data * * @param data data to be cleaned up * @throws Exception if an error occurs */ private void cleanUpData(Instances data)throws Exception{ m_Data = data; m_TransformFilter = new NominalToBinary(); m_TransformFilter.setInputFormat(m_Data); m_Data = Filter.useFilter(m_Data, m_TransformFilter); m_MissingFilter = new ReplaceMissingValues(); m_MissingFilter.setInputFormat(m_Data); m_Data = Filter.useFilter(m_Data, m_MissingFilter); m_Data.deleteWithMissingClass(); }
Example #4
Source File: PLSFilter.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * default constructor */ public PLSFilter() { super(); // setup pre-processing m_Missing = new ReplaceMissingValues(); m_Filter = new Center(); }
Example #5
Source File: ClassifierTools.java From tsml with GNU General Public License v3.0 | 5 votes |
public static Instances estimateMissing(Instances data){ ReplaceMissingValues nb = new ReplaceMissingValues(); Instances nd=null; try{ nb.setInputFormat(data); Instance temp; int n = data.numInstances(); for(int i=0;i<n;i++) nb.input(data.instance(i)); System.out.println(" Instances input"); System.out.println(" Output format retrieved"); // nd=Filter.useFilter(data,nb); // System.out.println(" Filtered? num atts = "+nd.numAttributes()+" num inst = "+nd.numInstances()+" filter = "+nb); if(nb.batchFinished()) System.out.println(" batch finished "); nd=nb.getOutputFormat(); for(int i=0;i<n;i++) { temp=nb.output(); // System.out.println(temp); nd.add(temp); } }catch(Exception e) { System.out.println("Error in estimateMissing = "+e.toString()); nd=data; System.exit(0); } return nd; }
Example #6
Source File: FilteredCollectiveClassifier.java From collective-classification-weka-package with GNU General Public License v3.0 | 5 votes |
/** * performs initialization of members */ @Override protected void initializeMembers() { super.initializeMembers(); m_Filter = new ReplaceMissingValues(); m_Classifier = new YATSI(); m_TrainsetNew = null; m_TestsetNew = null; }
Example #7
Source File: MakeDensityBasedClusterer.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Builds a clusterer for a set of instances. * * @param data the instances to train the clusterer with * @throws Exception if the clusterer hasn't been set or something goes wrong */ public void buildClusterer(Instances data) throws Exception { // can clusterer handle the data? getCapabilities().testWithFail(data); m_replaceMissing = new ReplaceMissingValues(); m_replaceMissing.setInputFormat(data); data = weka.filters.Filter.useFilter(data, m_replaceMissing); m_theInstances = new Instances(data, 0); if (m_wrappedClusterer == null) { throw new Exception("No clusterer has been set"); } m_wrappedClusterer.buildClusterer(data); m_model = new DiscreteEstimator[m_wrappedClusterer.numberOfClusters()][data.numAttributes()]; m_modelNormal = new double[m_wrappedClusterer.numberOfClusters()][data.numAttributes()][2]; double[][] weights = new double[m_wrappedClusterer.numberOfClusters()][data.numAttributes()]; m_priors = new double[m_wrappedClusterer.numberOfClusters()]; for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) { m_priors[i] = 1.0; // laplace correction for (int j = 0; j < data.numAttributes(); j++) { if (data.attribute(j).isNominal()) { m_model[i][j] = new DiscreteEstimator(data.attribute(j).numValues(), true); } } } Instance inst = null; // Compute mean, etc. int[] clusterIndex = new int[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { inst = data.instance(i); int cluster = m_wrappedClusterer.clusterInstance(inst); m_priors[cluster] += inst.weight(); for (int j = 0; j < data.numAttributes(); j++) { if (!inst.isMissing(j)) { if (data.attribute(j).isNominal()) { m_model[cluster][j].addValue(inst.value(j),inst.weight()); } else { m_modelNormal[cluster][j][0] += inst.weight() * inst.value(j); weights[cluster][j] += inst.weight(); } } } clusterIndex[i] = cluster; } for (int j = 0; j < data.numAttributes(); j++) { if (data.attribute(j).isNumeric()) { for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) { if (weights[i][j] > 0) { m_modelNormal[i][j][0] /= weights[i][j]; } } } } // Compute standard deviations for (int i = 0; i < data.numInstances(); i++) { inst = data.instance(i); for (int j = 0; j < data.numAttributes(); j++) { if (!inst.isMissing(j)) { if (data.attribute(j).isNumeric()) { double diff = m_modelNormal[clusterIndex[i]][j][0] - inst.value(j); m_modelNormal[clusterIndex[i]][j][1] += inst.weight() * diff * diff; } } } } for (int j = 0; j < data.numAttributes(); j++) { if (data.attribute(j).isNumeric()) { for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) { if (weights[i][j] > 0) { m_modelNormal[i][j][1] = Math.sqrt(m_modelNormal[i][j][1] / weights[i][j]); } else if (weights[i][j] <= 0) { m_modelNormal[i][j][1] = Double.MAX_VALUE; } if (m_modelNormal[i][j][1] <= m_minStdDev) { m_modelNormal[i][j][1] = data.attributeStats(j).numericStats.stdDev; if (m_modelNormal[i][j][1] <= m_minStdDev) { m_modelNormal[i][j][1] = m_minStdDev; } } } } } Utils.normalize(m_priors); }
Example #8
Source File: FT.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Builds the classifier. * * @param data the data to train with * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances data) throws Exception{ // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances filteredData = new Instances(data); filteredData.deleteWithMissingClass(); //replace missing values m_replaceMissing = new ReplaceMissingValues(); m_replaceMissing.setInputFormat(filteredData); filteredData = Filter.useFilter(filteredData, m_replaceMissing); //possibly convert nominal attributes globally if (m_convertNominal) { m_nominalToBinary = new NominalToBinary(); m_nominalToBinary.setInputFormat(filteredData); filteredData = Filter.useFilter(filteredData, m_nominalToBinary); } int minNumInstances = 2; //create a FT tree root if (m_modelType==0) m_tree = new FTNode( m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, m_weightTrimBeta, m_useAIC); //create a FTLeaves tree root if (m_modelType==1){ m_tree = new FTLeavesNode(m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, m_weightTrimBeta, m_useAIC); } //create a FTInner tree root if (m_modelType==2) m_tree = new FTInnerNode(m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, m_weightTrimBeta, m_useAIC); //build tree m_tree.buildClassifier(filteredData); // prune tree m_tree.prune(); m_tree.assignIDs(0); m_tree.cleanup(); }
Example #9
Source File: OrbitModel.java From orbit-image-analysis with GNU General Public License v3.0 | 4 votes |
/** * convert models from old weka version * * @param model */ public static void fixOldModelVersion(final OrbitModel model) { if (model == null) return; // nothing to fix boolean oldWekaVersion = false; try { model.getStructure().classAttribute().numValues(); } catch (NullPointerException ne) { oldWekaVersion = true; } // apply old model fix? if (oldWekaVersion) { logger.info("model from old weka version (< 3.7.11) detected, trying to apply fixes"); int numClasses = model.getClassShapes().size(); TissueFeatures tf = new TissueFeatures(model.getFeatureDescription(), null); int numFeatures = tf.getFeaturesPerSample() * model.getFeatureDescription().getSampleSize() + 1; ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(numFeatures); for (int a = 0; a < numFeatures - 1; a++) { Attribute attr = new Attribute("a" + a); attrInfo.add(attr); } List<String> classValues = new ArrayList<String>(numClasses); for (int i = 0; i < numClasses; i++) { classValues.add((i + 1) + ".0"); // "1.0", "2.0", ... } Attribute classAttr = new Attribute("class", classValues); attrInfo.add(classAttr); Instances structure = new Instances("trainSet pattern classes", attrInfo, 0); structure.setClassIndex(numFeatures - 1); model.setStructure(structure); try { if (model.getClassifier() != null && model.getClassifier().getClassifier() != null && model.getClassifier().getClassifier() instanceof SMO) { SMO smo = ((SMO) model.getClassifier().getClassifier()); Field field = smo.getClass().getDeclaredField("m_classAttribute"); field.setAccessible(true); field.set(smo, classAttr); // missing values ReplaceMissingValues rmv = new ReplaceMissingValues(); rmv.setInputFormat(structure); Field missing = smo.getClass().getDeclaredField("m_Missing"); missing.setAccessible(true); missing.set(smo, rmv); // filter Field filter = smo.getClass().getDeclaredField("m_Filter"); filter.setAccessible(true); Filter normalize = (Filter) filter.get(smo); RelationalLocator relLoc = new RelationalLocator(structure); StringLocator strLoc = new StringLocator(structure); Field outputRelAtts = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_OutputRelAtts"); outputRelAtts.setAccessible(true); outputRelAtts.set(normalize, relLoc); Field inputRelAtts = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_InputRelAtts"); inputRelAtts.setAccessible(true); inputRelAtts.set(normalize, relLoc); Field outputStrAtts = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_OutputStringAtts"); outputStrAtts.setAccessible(true); outputStrAtts.set(normalize, strLoc); Field inputStrAtts = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_InputStringAtts"); inputStrAtts.setAccessible(true); inputStrAtts.set(normalize, strLoc); Field outputFormat = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_OutputFormat"); outputFormat.setAccessible(true); outputFormat.set(normalize, structure); logger.info("fixes applied, the model should work with a weka version >= 3.7.11 now"); } // else: good luck... } catch (Exception e) { e.printStackTrace(); logger.error("new weka version fixes could not be applied: " + e.getMessage()); } } // old weka version fixOldModelVersion(model.getSegmentationModel()); // fixOldModelVersion can handle null fixOldModelVersion(model.getSecondarySegmentationModel()); // fixOldModelVersion can handle null fixOldModelVersion(model.getExclusionModel()); // fixOldModelVersion can handle null }
Example #10
Source File: KddCup.java From Machine-Learning-in-Java with MIT License | 4 votes |
public static Instances preProcessData(Instances data) throws Exception{ /* * Remove useless attributes */ RemoveUseless removeUseless = new RemoveUseless(); removeUseless.setOptions(new String[] { "-M", "99" }); // threshold removeUseless.setInputFormat(data); data = Filter.useFilter(data, removeUseless); /* * Remove useless attributes */ ReplaceMissingValues fixMissing = new ReplaceMissingValues(); fixMissing.setInputFormat(data); data = Filter.useFilter(data, fixMissing); /* * Remove useless attributes */ Discretize discretizeNumeric = new Discretize(); discretizeNumeric.setOptions(new String[] { "-O", "-M", "-1.0", "-B", "4", // no of bins "-R", "first-last"}); //range of attributes fixMissing.setInputFormat(data); data = Filter.useFilter(data, fixMissing); /* * Select only informative attributes */ InfoGainAttributeEval eval = new InfoGainAttributeEval(); Ranker search = new Ranker(); search.setOptions(new String[] { "-T", "0.001" }); // information gain threshold AttributeSelection attSelect = new AttributeSelection(); attSelect.setEvaluator(eval); attSelect.setSearch(search); // apply attribute selection attSelect.SelectAttributes(data); // remove the attributes not selected in the last run data = attSelect.reduceDimensionality(data); return data; }
Example #11
Source File: FilteredCollectiveClassifier.java From collective-classification-weka-package with GNU General Public License v3.0 | 4 votes |
/** * Parses a given list of options. Valid options are: <p/> * * -D <br/> * Turn on debugging output.<p/> * * -W classname <br/> * Specify the full class name of a classifier as the basis for * collective classifying (required).<p/> * * -folds folds <br/> * the number of folds for splitting the training set into train and test * set. the first fold is always the training set. With '-V' you can invert * this, i.e., instead of 20/80 for 5 folds you'll get 80/20. (default 5) <p/> * * -V <br/> * inverts the fold selection, i.e., instead of using the first fold for the * training set it is used for test set and the remaining folds for training. * <p/> * * -S seed <br/> * Random number seed for resampling (default 1). <p/> * * -verbose <br/> * whether to output some more information during improving the classifier. * <p/> * * -insight <br/> * whether to use the labels of the original test set to output more * statistics. <p/> * * -F class-spec <br/> * The classname and parameters for the filter * (default ReplaceMissingValues). <p/> * * Options after -- are passed to the designated classifier.<p/> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String tmpStr; String[] tmpOptions; tmpStr = Utils.getOption('F', options); if (tmpStr.length() != 0) { tmpOptions = Utils.splitOptions(tmpStr); tmpStr = tmpOptions[0]; tmpOptions[0] = ""; setFilter( (Filter) Utils.forName(Filter.class, tmpStr, tmpOptions)); } else { setFilter(new ReplaceMissingValues()); } super.setOptions(options); }
Example #12
Source File: YATSI.java From collective-classification-weka-package with GNU General Public License v3.0 | 4 votes |
/** * initializes the object * @param parent the parent algorithm * @param train the train instances * @param test the test instances * @param setWeights whether to set the weights for the training set * (the processed instances) * @throws Exception if something goes wrong */ public YATSIInstances(YATSI parent, Instances train, Instances test, boolean setWeights) throws Exception { super(); m_Parent = parent; // build sorted array (train + test) double weight; if (getParent().getNoWeights()) weight = 1.0; else weight = (double) train.numInstances() / (double) test.numInstances() * getParent().getWeightingFactor(); m_Unprocessed = new Instance[train.numInstances() + test.numInstances()]; for (int i = 0; i < train.numInstances(); i++) m_Unprocessed[i] = train.instance(i); for (int i = 0; i < test.numInstances(); i++) { m_Unprocessed[train.numInstances() + i] = test.instance(i); m_Unprocessed[train.numInstances() + i].setWeight(weight); } Arrays.sort(m_Unprocessed, m_Comparator); // weights m_Weights = new double[m_Unprocessed.length]; for (int i = 0; i < m_Unprocessed.length; i++) { m_Weights[i] = m_Unprocessed[i].weight(); if (!setWeights) m_Unprocessed[i].setWeight(1); } // filter data m_Trainset = new Instances(train, 0); for (int i = 0; i < m_Unprocessed.length; i++) m_Trainset.add(m_Unprocessed[i]); // set up filter m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(m_Trainset); m_Trainset = Filter.useFilter(m_Trainset, m_Missing); }