Java Code Examples for weka.filters.Filter#useFilter()
The following examples show how to use
weka.filters.Filter#useFilter() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FilterAttribute.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License | 7 votes |
/** * @param args the command line arguments */ public static void main(String[] args) { // TODO code application logic here try{ DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Datasets/weather.arff"); Instances dt = src.getDataSet(); String[] op = new String[]{"-R","2-4"}; Remove rmv = new Remove(); rmv.setOptions(op); rmv.setInputFormat(dt); Instances nd = Filter.useFilter(dt, rmv); ArffSaver s = new ArffSaver(); s.setInstances(nd); s.setFile(new File("fw.arff")); s.writeBatch(); } catch(Exception e){ System.out.println(e.getMessage()); } }
Example 2
Source File: Dl4JMlpFilterTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
protected void checkLayer(Dl4jMlpClassifier clf, Instances instances, String[] transformationLayerNames, String clfPath, boolean useZooModel) throws Exception { Instances activationsExpected = clf.getActivationsAtLayers(transformationLayerNames, instances); Dl4jMlpFilter filter = new Dl4jMlpFilter(); // Load the MNIST III if we're being called on the MNIST dataset (dataset is in meta format (String, class)) if (ImageInstanceIterator.isMetaArff(instances)) filter.setInstanceIterator(DatasetLoader.loadMiniMnistImageIterator()); filter.setSerializedModelFile(new File(clfPath)); filter.setTransformationLayerNames(transformationLayerNames); filter.setInputFormat(instances); filter.setPoolingType(PoolingType.NONE); Instances activationsActual = Filter.useFilter(instances, filter); for (int i = 0; i < activationsActual.size(); i++) { Instance expected = activationsExpected.get(i); Instance actual = activationsActual.get(i); for (int j = 0; j < expected.numAttributes(); j++) { assertEquals(expected.value(j), actual.value(j), 1e-6); } } }
Example 3
Source File: RISE.java From tsml with GNU General Public License v3.0 | 6 votes |
private Instances filterData(Instances result) throws Exception{ int maxLag=(result.numAttributes()-1)/4; if(maxLag>ACF.DEFAULT_MAXLAG) maxLag=ACF.DEFAULT_MAXLAG; Instances[] t=new Instances[filters.length]; for(int j=0;j<filters.length;j++){ // Im not sure this a sensible or robust way of doing this //What if L meant something else to the SimpleFilter? //Can you use a whole string, e.g. MAXLAG? filters[j].setOptions(new String[]{"L",maxLag+""}); filters[j].setInputFormat(result); t[j]=Filter.useFilter(result, filters[j]); } //4. Merge them all together Instances combo=new Instances(t[0]); for(int j=1;j<filters.length;j++){ if( j < filters.length){ combo.setClassIndex(-1); combo.deleteAttributeAt(combo.numAttributes()-1); } combo=Instances.mergeInstances(combo, t[j]); } combo.setClassIndex(combo.numAttributes()-1); return combo; }
Example 4
Source File: ConsistencySubsetEval.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Generates a attribute evaluator. Has to initialize all fields of the * evaluator that are not being set via options. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully */ public void buildEvaluator (Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); m_trainInstances = new Instances(data); m_trainInstances.deleteWithMissingClass(); m_classIndex = m_trainInstances.classIndex(); m_numAttribs = m_trainInstances.numAttributes(); m_numInstances = m_trainInstances.numInstances(); m_disTransform = new Discretize(); m_disTransform.setUseBetterEncoding(true); m_disTransform.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_disTransform); }
Example 5
Source File: TransformExamples.java From tsml with GNU General Public License v3.0 | 5 votes |
public static Instances acfTransform(Instances data){ ACF acf=new ACF(); acf.setMaxLag(data.numAttributes()/4); Instances acfTrans=null; try{ acf.setInputFormat(data); acfTrans=Filter.useFilter(data, acf); }catch(Exception e){ System.out.println(" Exception in ACF harness="+e); e.printStackTrace(); System.exit(0); } return acfTrans; }
Example 6
Source File: MDD.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Computes the distribution for a given exemplar * * @param exmp the exemplar for which distribution is computed * @return the distribution * @throws Exception if the distribution can't be computed successfully */ public double[] distributionForInstance(Instance exmp) throws Exception { // Extract the data Instances ins = exmp.relationalValue(1); if(m_Filter!=null) ins = Filter.useFilter(ins, m_Filter); ins = Filter.useFilter(ins, m_Missing); int nI = ins.numInstances(), nA = ins.numAttributes(); double[][] dat = new double [nI][nA]; for(int j=0; j<nI; j++){ for(int k=0; k<nA; k++){ dat[j][k] = ins.instance(j).value(k); } } // Compute the probability of the bag double [] distribution = new double[2]; distribution[1]=0.0; // Prob. for class 1 for(int i=0; i<nI; i++){ double exp = 0.0; for(int r=0; r<nA; r++) exp += (m_Par[r*2]-dat[i][r])*(m_Par[r*2]-dat[i][r])/ ((m_Par[r*2+1])*(m_Par[r*2+1])); exp = Math.exp(-exp); // Prob. updated for one instance distribution[1] += exp/(double)nI; distribution[0] += (1.0-exp)/(double)nI; } return distribution; }
Example 7
Source File: Dl4jStringToWord2VecTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
public void testReuters() throws Exception { final String arffPath = "datasets/text/ReutersCorn-train.arff"; ConverterUtils.DataSource ds = new ConverterUtils.DataSource(arffPath); final Instances data = ds.getDataSet(); Dl4jStringToWord2Vec dl4jw2v = new Dl4jStringToWord2Vec(); dl4jw2v.setInputFormat(data); Instances d = Filter.useFilter(data, dl4jw2v); }
Example 8
Source File: PrincipalComponents.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Fill the correlation matrix */ private void fillCorrelation() throws Exception { m_correlation = new double[m_numAttribs][m_numAttribs]; double [] att1 = new double [m_numInstances]; double [] att2 = new double [m_numInstances]; double corr; for (int i = 0; i < m_numAttribs; i++) { for (int j = 0; j < m_numAttribs; j++) { for (int k = 0; k < m_numInstances; k++) { att1[k] = m_trainInstances.instance(k).value(i); att2[k] = m_trainInstances.instance(k).value(j); } if (i == j) { m_correlation[i][j] = 1.0; // store the standard deviation m_stdDevs[i] = Math.sqrt(Utils.variance(att1)); } else { corr = Utils.correlation(att1,att2,m_numInstances); m_correlation[i][j] = corr; m_correlation[j][i] = corr; } } } // now standardize the input data m_standardizeFilter = new Standardize(); m_standardizeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_standardizeFilter); }
Example 9
Source File: ZooModelTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
private void filterModel(AbstractZooModel model) throws Exception { try { Dl4jMlpFilter myFilter = new Dl4jMlpFilter(); ImageInstanceIterator iterator = DatasetLoader.loadMiniMnistImageIterator(); Instances shrunkenInstances = shrinkInstances(DatasetLoader.loadMiniMnistMeta()); myFilter.setZooModelType(model); myFilter.setInstanceIterator(iterator); myFilter.setInputFormat(shrunkenInstances); Filter.useFilter(shrunkenInstances, myFilter); } catch (OutOfMemoryError error) { throw new OutOfMemoryError("Dl4jMlpFilter test ran out of memory, possibly due to running multiple tests.\n" + " Please run this test individually to ensure this is the case and no other exceptions have occured."); } }
Example 10
Source File: F.java From meka with GNU General Public License v3.0 | 5 votes |
/** * Remove Indices - Remove ALL labels (assume they are the first L attributes) from D. * @param D Dataset * @param L number of labels * @return New dataset with labels removed. */ public static Instances removeLabels(Instances D, int L) throws Exception { Remove remove = new Remove(); remove.setAttributeIndices("1-"+L); remove.setInputFormat(D); return Filter.useFilter(D, remove); }
Example 11
Source File: Dl4jMlpClassifier.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
/** * Apply the filters to the given Instances * * @param insts Instances that are going to be filtered * @return Filtered Instances * @throws Exception Filter could not be applied */ protected Instances applyFilters(Instances insts) throws Exception { // Filter the instance insts = Filter.useFilter(insts, replaceMissingFilter); insts = Filter.useFilter(insts, nominalToBinaryFilter); if (filter != null) { insts = Filter.useFilter(insts, filter); } return insts; }
Example 12
Source File: WekaFeatureSelectionTest.java From Java-Data-Science-Cookbook with MIT License | 5 votes |
public void selectFeaturesWithFilter(){ weka.filters.supervised.attribute.AttributeSelection filter = new weka.filters.supervised.attribute.AttributeSelection(); CfsSubsetEval eval = new CfsSubsetEval(); BestFirst search = new BestFirst(); filter.setEvaluator(eval); filter.setSearch(search); try { filter.setInputFormat(iris); Instances newData = Filter.useFilter(iris, filter); System.out.println(newData); } catch (Exception e) { } }
Example 13
Source File: FTtree.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Returns a numeric version of a set of instances. * All nominal attributes are replaced by binary ones, and the class variable is replaced * by a pseudo-class variable that is used by LogitBoost. */ protected Instances getNumericData(Instances train) throws Exception{ Instances filteredData = new Instances(train); m_nominalToBinary = new NominalToBinary(); m_nominalToBinary.setInputFormat(filteredData); filteredData = Filter.useFilter(filteredData, m_nominalToBinary); return super.getNumericData(filteredData); }
Example 14
Source File: PrincipalComponents.java From tsml with GNU General Public License v3.0 | 4 votes |
private void fillCovariance() throws Exception { // first store the means m_means = new double[m_trainInstances.numAttributes()]; m_stdDevs = new double[m_trainInstances.numAttributes()]; for (int i = 0; i < m_trainInstances.numAttributes(); i++) { m_means[i] = m_trainInstances.meanOrMode(i); } if (!m_center) { fillCorrelation(); return; } double[] att = new double[m_trainInstances.numInstances()]; // now center the data by subtracting the mean m_centerFilter = new Center(); m_centerFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_centerFilter); // now compute the covariance matrix m_correlation = new double[m_numAttribs][m_numAttribs]; for (int i = 0; i < m_numAttribs; i++) { for (int j = 0; j < m_numAttribs; j++) { double cov = 0; for (int k = 0; k < m_numInstances; k++) { if (i == j) { cov += (m_trainInstances.instance(k).value(i) * m_trainInstances.instance(k).value(i)); } else { cov += (m_trainInstances.instance(k).value(i) * m_trainInstances.instance(k).value(j)); } } cov /= (double)(m_trainInstances.numInstances() - 1); m_correlation[i][j] = cov; m_correlation[j][i] = cov; } } }
Example 15
Source File: Dl4jMlpTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 4 votes |
@Test public void testTextCnnClassification() throws Exception { CnnTextEmbeddingInstanceIterator cnnTextIter = new CnnTextEmbeddingInstanceIterator(); cnnTextIter.setTrainBatchSize(128); cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors()); clf.setInstanceIterator(cnnTextIter); cnnTextIter.initialize(); final WordVectors wordVectors = cnnTextIter.getWordVectors(); int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length; ConvolutionLayer conv1 = new ConvolutionLayer(); conv1.setKernelSize(new int[]{4, vectorSize}); conv1.setNOut(10); conv1.setStride(new int[]{1, vectorSize}); conv1.setConvolutionMode(ConvolutionMode.Same); conv1.setActivationFunction(new ActivationReLU()); BatchNormalization bn1 = new BatchNormalization(); ConvolutionLayer conv2 = new ConvolutionLayer(); conv2.setKernelSize(new int[]{3, vectorSize}); conv2.setNOut(10); conv2.setStride(new int[]{1, vectorSize}); conv2.setConvolutionMode(ConvolutionMode.Same); conv2.setActivationFunction(new ActivationReLU()); BatchNormalization bn2 = new BatchNormalization(); ConvolutionLayer conv3 = new ConvolutionLayer(); conv3.setKernelSize(new int[]{2, vectorSize}); conv3.setNOut(10); conv3.setStride(new int[]{1, vectorSize}); conv3.setConvolutionMode(ConvolutionMode.Same); conv3.setActivationFunction(new ActivationReLU()); BatchNormalization bn3 = new BatchNormalization(); GlobalPoolingLayer gpl = new GlobalPoolingLayer(); OutputLayer out = new OutputLayer(); // clf.setLayers(conv1, bn1, conv2, bn2, conv3, bn3, gpl, out); clf.setLayers(conv1, conv2, conv3, gpl, out); // clf.setNumEpochs(50); clf.setCacheMode(CacheMode.MEMORY); final EpochListener l = new EpochListener(); l.setN(1); clf.setIterationListener(l); clf.setEarlyStopping(new EarlyStopping(10, 15)); clf.setDebug(true); // NNC NeuralNetConfiguration nnc = new NeuralNetConfiguration(); nnc.setL2(1e-3); final Dropout dropout = new Dropout(); dropout.setP(0.2); nnc.setDropout(dropout); clf.setNeuralNetConfiguration(nnc); // Data final Instances data = DatasetLoader.loadImdb(); data.randomize(new Random(42)); RemovePercentage rp = new RemovePercentage(); rp.setInputFormat(data); rp.setPercentage(98); final Instances dataFiltered = Filter.useFilter(data, rp); TestUtil.holdout(clf, dataFiltered); }
Example 16
Source File: DecisionTreeEstimator.java From jMetal with MIT License | 4 votes |
public double doPrediction(int index,S testSolution) { double result = 0.0d; try { int numberOfObjectives = solutionList.get(0).getNumberOfObjectives(); //Attributes //numeric Attribute attr = new Attribute("my-numeric"); //nominal ArrayList<String> myNomVals = new ArrayList<>(); for (int i=0; i<numberOfObjectives; i++) myNomVals.add(VALUE_STRING+i); Attribute attr1 = new Attribute(NOMINAL_STRING, myNomVals); //System.out.println(attr1.isNominal()); //string Attribute attr2 = new Attribute(MY_STRING, (List<String>)null); //System.out.println(attr2.isString()); //2.create dataset ArrayList<Attribute> attrs = new ArrayList<>(); attrs.add(attr); attrs.add(attr1); attrs.add(attr2); Instances dataset = new Instances("my_dataset", attrs, 0); //Add instances for (S solution : solutionList) { //instaces for (int i = 0; i <numberOfObjectives ; i++) { double[] attValues = new double[dataset.numAttributes()]; attValues[0] = solution.getObjective(i); attValues[1] = dataset.attribute(NOMINAL_STRING).indexOfValue(VALUE_STRING+i); attValues[2] = dataset.attribute(MY_STRING).addStringValue(solution.toString()+i); dataset.add(new DenseInstance(1.0, attValues)); } } //DataSet test Instances datasetTest = new Instances("my_dataset_test", attrs, 0); //Add instances for (int i = 0; i < numberOfObjectives; i++) { Instance test = new DenseInstance(3); test.setValue(attr, testSolution.getObjective(i)); test.setValue(attr1, VALUE_STRING+i); test.setValue(attr2, testSolution.toString()+i); datasetTest.add(test); // dataset.add(test); } //split to 70:30 learn and test set //Preprocess strings (almost no classifier supports them) StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(dataset); dataset = Filter.useFilter(dataset, filter); //Buid classifier dataset.setClassIndex(1); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //resample if needed //dataset = dataset.resample(new Random(42)); dataset.setClassIndex(1); datasetTest.setClassIndex(1); //do eval Evaluation eval = new Evaluation(datasetTest); //trainset eval.evaluateModel(classifier, datasetTest); //testset result = classifier.classifyInstance(datasetTest.get(index)); } catch (Exception e) { result = testSolution.getObjective(index); } return result; }
Example 17
Source File: StackingC.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Method that builds meta level. * * @param newData the data to work with * @param random the random number generator to use for cross-validation * @throws Exception if generation fails */ protected void generateMetaLevel(Instances newData, Random random) throws Exception { Instances metaData = metaFormat(newData); m_MetaFormat = new Instances(metaData, 0); for (int j = 0; j < m_NumFolds; j++) { Instances train = newData.trainCV(m_NumFolds, j, random); // Build base classifiers for (int i = 0; i < m_Classifiers.length; i++) { getClassifier(i).buildClassifier(train); } // Classify test instances and add to meta data Instances test = newData.testCV(m_NumFolds, j); for (int i = 0; i < test.numInstances(); i++) { metaData.add(metaInstance(test.instance(i))); } } m_MetaClassifiers = AbstractClassifier.makeCopies(m_MetaClassifier, m_BaseFormat.numClasses()); int [] arrIdc = new int[m_Classifiers.length + 1]; arrIdc[m_Classifiers.length] = metaData.numAttributes() - 1; Instances newInsts; for (int i = 0; i < m_MetaClassifiers.length; i++) { for (int j = 0; j < m_Classifiers.length; j++) { arrIdc[j] = m_BaseFormat.numClasses() * j + i; } m_makeIndicatorFilter = new weka.filters.unsupervised.attribute.MakeIndicator(); m_makeIndicatorFilter.setAttributeIndex("" + (metaData.classIndex() + 1)); m_makeIndicatorFilter.setNumeric(true); m_makeIndicatorFilter.setValueIndex(i); m_makeIndicatorFilter.setInputFormat(metaData); newInsts = Filter.useFilter(metaData,m_makeIndicatorFilter); m_attrFilter = new weka.filters.unsupervised.attribute.Remove(); m_attrFilter.setInvertSelection(true); m_attrFilter.setAttributeIndicesArray(arrIdc); m_attrFilter.setInputFormat(m_makeIndicatorFilter.getOutputFormat()); newInsts = Filter.useFilter(newInsts,m_attrFilter); newInsts.setClassIndex(newInsts.numAttributes()-1); m_MetaClassifiers[i].buildClassifier(newInsts); } }
Example 18
Source File: KddCup.java From Machine-Learning-in-Java with MIT License | 4 votes |
public static Instances preProcessData(Instances data) throws Exception{ /* * Remove useless attributes */ RemoveUseless removeUseless = new RemoveUseless(); removeUseless.setOptions(new String[] { "-M", "99" }); // threshold removeUseless.setInputFormat(data); data = Filter.useFilter(data, removeUseless); /* * Remove useless attributes */ ReplaceMissingValues fixMissing = new ReplaceMissingValues(); fixMissing.setInputFormat(data); data = Filter.useFilter(data, fixMissing); /* * Remove useless attributes */ Discretize discretizeNumeric = new Discretize(); discretizeNumeric.setOptions(new String[] { "-O", "-M", "-1.0", "-B", "4", // no of bins "-R", "first-last"}); //range of attributes fixMissing.setInputFormat(data); data = Filter.useFilter(data, fixMissing); /* * Select only informative attributes */ InfoGainAttributeEval eval = new InfoGainAttributeEval(); Ranker search = new Ranker(); search.setOptions(new String[] { "-T", "0.001" }); // information gain threshold AttributeSelection attSelect = new AttributeSelection(); attSelect.setEvaluator(eval); attSelect.setSearch(search); // apply attribute selection attSelect.SelectAttributes(data); // remove the attributes not selected in the last run data = attSelect.reduceDimensionality(data); return data; }
Example 19
Source File: DataTableModel.java From meka with GNU General Public License v3.0 | 4 votes |
/** * sets the attribute at the given col index as the new class attribute, i.e. * it moves it to the end of the attributes * * @param columnIndex the index of the column */ public void attributeAsClassAt(int columnIndex) { Reorder reorder; String order; int i; if ((columnIndex > 0) && (columnIndex < getColumnCount())) { addUndoPoint(); try { // build order string (1-based!) order = ""; for (i = 1; i < m_Data.numAttributes() + 1; i++) { // skip new class if (i == columnIndex) { continue; } if (!order.equals("")) { order += ","; } order += Integer.toString(i); } if (!order.equals("")) { order += ","; } order += Integer.toString(columnIndex); // process data reorder = new Reorder(); reorder.setAttributeIndices(order); reorder.setInputFormat(m_Data); m_Data = Filter.useFilter(m_Data, reorder); // set class index m_Data.setClassIndex(m_Data.numAttributes() - 1); } catch (Exception e) { e.printStackTrace(); undo(); } notifyListener(new TableModelEvent(this, TableModelEvent.HEADER_ROW)); } }
Example 20
Source File: RegressionTask.java From Machine-Learning-in-Java with MIT License | 4 votes |
public static void main(String[] args) throws Exception { /* * Load data */ CSVLoader loader = new CSVLoader(); loader.setFieldSeparator(","); loader.setSource(new File("data/ENB2012_data.csv")); Instances data = loader.getDataSet(); // System.out.println(data); /* * Build regression models */ // set class index to Y1 (heating load) data.setClassIndex(data.numAttributes() - 2); // remove last attribute Y2 Remove remove = new Remove(); remove.setOptions(new String[] { "-R", data.numAttributes() + "" }); remove.setInputFormat(data); data = Filter.useFilter(data, remove); // build a regression model LinearRegression model = new LinearRegression(); model.buildClassifier(data); System.out.println(model); // 10-fold cross-validation Evaluation eval = new Evaluation(data); eval.crossValidateModel(model, data, 10, new Random(1), new String[] {}); System.out.println(eval.toSummaryString()); double coef[] = model.coefficients(); System.out.println(); // build a regression tree model M5P md5 = new M5P(); md5.setOptions(new String[] { "" }); md5.buildClassifier(data); System.out.println(md5); // 10-fold cross-validation eval.crossValidateModel(md5, data, 10, new Random(1), new String[] {}); System.out.println(eval.toSummaryString()); System.out.println(); /* * Bonus: Build additional models */ // ZeroR modelZero = new ZeroR(); // // // // // // REPTree modelTree = new REPTree(); // modelTree.buildClassifier(data); // System.out.println(modelTree); // eval = new Evaluation(data); // eval.crossValidateModel(modelTree, data, 10, new Random(1), new // String[]{}); // System.out.println(eval.toSummaryString()); // // SMOreg modelSVM = new SMOreg(); // // MultilayerPerceptron modelPerc = new MultilayerPerceptron(); // // GaussianProcesses modelGP = new GaussianProcesses(); // modelGP.buildClassifier(data); // System.out.println(modelGP); // eval = new Evaluation(data); // eval.crossValidateModel(modelGP, data, 10, new Random(1), new // String[]{}); // System.out.println(eval.toSummaryString()); /* * Bonus: Save ARFF */ // ArffSaver saver = new ArffSaver(); // saver.setInstances(data); // saver.setFile(new File(args[1])); // saver.setDestination(new File(args[1])); // saver.writeBatch(); }