Java Code Examples for weka.core.Instances#add()
The following examples show how to use
weka.core.Instances#add() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataSetUtilsTest.java From AILibs with GNU Affero General Public License v3.0 | 6 votes |
public void cifar10InstancesAttributesTest() { ArrayList<Attribute> atts = new ArrayList<>(); for (int i = 0; i < 32 * 32 * 3 + 1; i++) { atts.add(new Attribute("blub" + i)); } Instances instances = new Instances("test", atts, 1); DenseInstance inst = new DenseInstance(atts.size()); for (int i = 0; i < inst.numAttributes(); i++) { inst.setValue(i, 1d); } inst.setDataset(instances); instances.add(inst); INDArray result = DataSetUtils.cifar10InstanceToMatrix(inst); Assert.assertArrayEquals(new long[]{32, 32, 3}, result.shape()); }
Example 2
Source File: AbstractEnsemble.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * @return the distributions of each individual module, i.e [0] = first module's dist, [1] = second... */ public double[][] distributionForInstanceByConstituents(Instance instance) throws Exception{ Instance ins = instance; if(this.transform!=null){ Instances rawContainer = new Instances(instance.dataset(),0); rawContainer.add(instance); Instances converted = transform.process(rawContainer); ins = converted.instance(0); } double[][] distsByClassifier = new double[this.modules.length][]; for(int i=0;i<modules.length;i++){ distsByClassifier[i] = modules[i].getClassifier().distributionForInstance(ins); } return distsByClassifier; }
Example 3
Source File: BestConf.java From bestconf with Apache License 2.0 | 5 votes |
public static void testCOMT2() throws Exception{ BestConf bestconf = new BestConf(); Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff"); trainingSet.setClassIndex(trainingSet.numAttributes()-1); Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(), InitialSampleSetSize, false); samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes()); samplePoints.setClassIndex(samplePoints.numAttributes()-1); COMT2 comt = new COMT2(samplePoints, COMT2Iteration); comt.buildClassifier(trainingSet); Evaluation eval = new Evaluation(trainingSet); eval.evaluateModel(comt, trainingSet); System.err.println(eval.toSummaryString()); Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance()); Instances bestInstances = new Instances(trainingSet,2); bestInstances.add(best); DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances); //now we output the training set with the class value updated as the predicted value Instances output = new Instances(trainingSet, trainingSet.numInstances()); Enumeration<Instance> enu = trainingSet.enumerateInstances(); while(enu.hasMoreElements()){ Instance ins = enu.nextElement(); double[] values = ins.toDoubleArray(); values[values.length-1] = comt.classifyInstance(ins); output.add(ins.copy(values)); } DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output); }
Example 4
Source File: KDTree.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Returns the k nearest neighbours of the supplied instance. * >k neighbours are returned if there are more than one * neighbours at the kth boundary. * * @param target The instance to find the nearest neighbours for. * @param k The number of neighbours to find. * @return The k nearest neighbours (or >k if more there are than * one neighbours at the kth boundary). * @throws Exception if the nearest neighbour could not be found. */ public Instances kNearestNeighbours(Instance target, int k) throws Exception { checkMissing(target); if (m_Stats != null) m_Stats.searchStart(); MyHeap heap = new MyHeap(k); findNearestNeighbours(target, m_Root, k, heap, 0.0); if (m_Stats != null) m_Stats.searchFinish(); Instances neighbours = new Instances(m_Instances, (heap.size() + heap .noOfKthNearest())); m_DistanceList = new double[heap.size() + heap.noOfKthNearest()]; int[] indices = new int[heap.size() + heap.noOfKthNearest()]; int i = indices.length - 1; MyHeapElement h; while (heap.noOfKthNearest() > 0) { h = heap.getKthNearest(); indices[i] = h.index; m_DistanceList[i] = h.distance; i--; } while (heap.size() > 0) { h = heap.get(); indices[i] = h.index; m_DistanceList[i] = h.distance; i--; } m_DistanceFunction.postProcessDistances(m_DistanceList); for (int idx = 0; idx < indices.length; idx++) { neighbours.add(m_Instances.instance(indices[idx])); } return neighbours; }
Example 5
Source File: ZooModelTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
private Instances shrinkInstances(Instances data) { ArrayList<Attribute> atts = new ArrayList<>(); for (int i = 0; i < data.numAttributes(); i++) { atts.add(data.attribute(i)); } Instances shrunkenData = new Instances("shrinked", atts, 10); shrunkenData.setClassIndex(1); for (int i = 0; i < 10; i++) { Instance inst = data.get(i); inst.setClassValue(i % 10); inst.setDataset(shrunkenData); shrunkenData.add(inst); } return shrunkenData; }
Example 6
Source File: FeatureGeneratorTree.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
@Override public Instance apply(final Instance data) throws PreprocessingException { try { Instances instances = new Instances(data.dataset()); instances.clear(); instances.add(data); return this.apply(instances).firstInstance(); } catch (Exception e) { throw new PreprocessingException(e); } }
Example 7
Source File: Standardization.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
@Override public Instances apply(final Instances data) throws PreprocessingException { Instances newInstances = new Instances(data); newInstances.clear(); for (Instance i : data) { newInstances.add(this.apply(i)); } return newInstances; }
Example 8
Source File: WekaUtil.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
public static Instances mergeClassesOfInstances(final Instances data, final Collection<String> cluster1, final Collection<String> cluster2) { Instances newData = WekaUtil.getEmptySetOfInstancesWithRefactoredClass(data); for (Instance i : data) { Instance iNew = (Instance) i.copy(); String className = i.classAttribute().value((int) Math.round(i.classValue())); if (cluster1.contains(className)) { iNew.setClassValue(0.0); newData.add(iNew); } else if (cluster2.contains(className)) { iNew.setClassValue(1.0); newData.add(iNew); } } return newData; }
Example 9
Source File: BinaryTransform.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override public Instances process(Instances data) throws Exception{ Instances output = determineOutputFormat(data); if(findNewSplits){ splits=new double[data.numAttributes()]; double[] classes=new double[data.numInstances()]; for(int i=0;i<classes.length;i++) classes[i]=data.instance(i).classValue(); for (int j=0; j< data.numAttributes(); j++) { // for each data if(j!=data.classIndex()){ //Get values of attribute j double[] vals=new double[data.numInstances()]; for(int i=0;i<data.numInstances();i++) vals[i]=data.instance(i).value(j); //find the IG split point splits[j] =findSplitValue(data,vals,classes); } } findNewSplits=false; } //Extract out the terms and set the attributes for(int i=0;i<data.numInstances();i++){ Instance newInst=new DenseInstance(data.numAttributes()); for(int j=0;j<data.numAttributes();j++){ if(j!=data.classIndex()){ if(data.instance(i).value(j)<splits[j]) newInst.setValue(j,0); else newInst.setValue(j,1); } else newInst.setValue(j,data.instance(i).classValue()); } output.add(newInst); } return output; }
Example 10
Source File: MLSophisticatedPipeline.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
@Override public Instances apply(final Instances data) throws PreprocessingException{ Instances probingResults = new Instances(this.getEmptyProbingResultDataset()); for (Instance inst : data) { Instance probedInst = this.apply(inst); probedInst.setDataset(probingResults); probingResults.add(probedInst); } return probingResults; }
Example 11
Source File: C45Loader.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Return the full data set. If the structure hasn't yet been determined by a * call to getStructure then method should do so before processing the rest of * the data set. * * @return the structure of the data set as an empty set of Instances * @exception IOException if there is no source or parsing fails */ @Override public Instances getDataSet() throws IOException { if (m_sourceFile == null) { throw new IOException("No source has been specified"); } if (getRetrieval() == INCREMENTAL) { throw new IOException( "Cannot mix getting Instances in both incremental and batch modes"); } setRetrieval(BATCH); if (m_structure == null) { getStructure(); } StreamTokenizer st = new StreamTokenizer(m_dataReader); initTokenizer(st); // st.ordinaryChar('.'); Instances result = new Instances(m_structure); Instance current = getInstance(st); while (current != null) { result.add(current); current = getInstance(st); } try { // close the stream m_dataReader.close(); // reset(); } catch (Exception ex) { ex.printStackTrace(); } return result; }
Example 12
Source File: BoTSWEnsemble.java From tsml with GNU General Public License v3.0 | 5 votes |
public double[] distributionForInstanceSVM(Instance instnc) throws Exception { BoTSW_Bag testBag = buildTestBag(instnc); //classify Instances testBagData = new Instances(bagData, 1); double[] inst = new double[params.k+1]; for (int j = 0; j < params.k; ++j) inst[j] = testBag.hist[j]; inst[inst.length-1] = testBag.classValue; testBagData.add(new DenseInstance(1, inst)); return svm.distributionForInstance(testBagData.get(0)); }
Example 13
Source File: LHSInitializer.java From bestconf with Apache License 2.0 | 4 votes |
/** * At current version, we assume all attributes are numeric attributes with bounds * * Let PACE be upper-lower DIVided by the sampleSetSize * * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ public static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){ int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist=null; //generate L sets of sampleSetSize points for(int i=0; i<L; i++){ ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if(crntMinDist>maxMinDist){ setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; double pace; for(int i=0;i<bounds.length;i++){ crntAttr = itr.next(); bounds[i][0] = crntAttr.getLowerNumericBound(); bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound(); pace = (bounds[i][sampleSetSize] - bounds[i][0])/sampleSetSize; for(int j=1;j<sampleSetSize;j++){ bounds[i][j] = bounds[i][j-1] + pace; } } //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for(int i=0;i<sampleSetSize;i++){ double[] vals = new double[atts.size()]; for(int j=0;j<vals.length;j++){ vals[j] = useMid? (bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2: bounds[j][setWithMaxMinDist[j].get(i)]+ ( (bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble() ); } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
Example 14
Source File: UnionFilter.java From AILibs with GNU Affero General Public License v3.0 | 4 votes |
static DataSet union(final DataSet coll1, final DataSet coll2) { if (coll1 == null || coll2 == null) { throw new IllegalArgumentException("Parameters 'coll1' and 'coll2' must not be null!"); } if (coll1.getIntermediateInstances() == null || coll2.getIntermediateInstances() == null) { // Merge Weka instances Instances instances1 = coll1.getInstances(); Instances instances2 = coll2.getInstances(); if (instances1.numInstances() != instances2.numInstances()) { throw new IllegalArgumentException("Data sets to be united must have the same amount of instances!"); } ArrayList<Attribute> attributes = new ArrayList<>( coll1.getInstances().numAttributes() + coll2.getInstances().numAttributes() - 1); for (int i = 0; i < instances1.numAttributes() - 1; i++) { attributes.add(instances1.attribute(i).copy(instances1.attribute(i).name() + "u1")); } for (int i = 0; i < instances2.numAttributes() - 1; i++) { attributes.add(instances2.attribute(i).copy(instances2.attribute(i).name() + "u2")); } // Add class attribute List<String> classValues = IntStream.range(0, instances1.classAttribute().numValues()).asDoubleStream() .mapToObj(String::valueOf).collect(Collectors.toList()); Attribute classAtt = new Attribute("classAtt", classValues); attributes.add(classAtt); Instances unitedInstances = new Instances("UnitedInstances", attributes, instances1.numInstances()); unitedInstances.setClassIndex(unitedInstances.numAttributes() - 1); for (int i = 0; i < instances1.numInstances(); i++) { Instance instance = new DenseInstance(attributes.size()); instance.setDataset(unitedInstances); // Copy values int runningIndex = 0; for (int j = 0; j < instances1.numAttributes() - 1; j++) { instance.setValue(runningIndex++, instances1.get(i).value(j)); } for (int j = 0; j < instances2.numAttributes() - 1; j++) { instance.setValue(runningIndex++, instances2.get(i).value(j)); } instance.setClassValue(instances1.get(i).classValue()); unitedInstances.add(instance); } return new DataSet(unitedInstances, null); } else { if (coll1.getIntermediateInstances().isEmpty() || coll2.getIntermediateInstances().isEmpty()) { throw new IllegalArgumentException("There must be intermediate instances if the collection is set."); } // Merge intermediate instances List<INDArray> intermediateInsts1 = coll1.getIntermediateInstances(); List<INDArray> intermediateInsts2 = coll2.getIntermediateInstances(); List<INDArray> unitedIntermediateInsts = new ArrayList<>( (int) (intermediateInsts1.get(0).length() + intermediateInsts2.get(0).length())); for (int i = 0; i < intermediateInsts1.size(); i++) { INDArray intermediateInst = Nd4j.hstack(intermediateInsts1.get(i).ravel(), intermediateInsts2.get(i).ravel()); unitedIntermediateInsts.add(intermediateInst); } return new DataSet(coll1.getInstances(), unitedIntermediateInsts); } }
Example 15
Source File: DecisionTreeEstimator.java From jMetal with MIT License | 4 votes |
public double doPredictionVariable(int index,S testSolution) { double result = 0.0d; try { int numberOfVariables = solutionList.get(0).getNumberOfVariables(); //Attributes //numeric Attribute attr = new Attribute("my-numeric"); //nominal ArrayList<String> myNomVals = new ArrayList<>(); for (int i=0; i<numberOfVariables; i++) myNomVals.add(VALUE_STRING+i); Attribute attr1 = new Attribute(NOMINAL_STRING, myNomVals); //string Attribute attr2 = new Attribute(MY_STRING, (List<String>)null); //2.create dataset ArrayList<Attribute> attrs = new ArrayList<>(); attrs.add(attr); attrs.add(attr1); attrs.add(attr2); Instances dataset = new Instances("my_dataset", attrs, 0); //Add instances for (S solution : solutionList) { //instaces for (int i = 0; i <numberOfVariables ; i++) { double[] attValues = new double[dataset.numAttributes()]; attValues[0] = ((DoubleSolution)solution).getVariable(i); attValues[1] = dataset.attribute(NOMINAL_STRING).indexOfValue(VALUE_STRING+i); attValues[2] = dataset.attribute(MY_STRING).addStringValue(solution.toString()+i); dataset.add(new DenseInstance(1.0, attValues)); } } //DataSet test Instances datasetTest = new Instances("my_dataset_test", attrs, 0); //Add instances for (int i = 0; i < numberOfVariables; i++) { Instance test = new DenseInstance(3); test.setValue(attr, ((DoubleSolution)testSolution).getVariable(i)); test.setValue(attr1, VALUE_STRING+i); test.setValue(attr2, testSolution.toString()+i); datasetTest.add(test); // dataset.add(test); } //split to 70:30 learn and test set //Preprocess strings (almost no classifier supports them) StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(dataset); dataset = Filter.useFilter(dataset, filter); //Buid classifier dataset.setClassIndex(1); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //resample if needed //dataset = dataset.resample(new Random(42)); dataset.setClassIndex(1); datasetTest.setClassIndex(1); //do eval Evaluation eval = new Evaluation(datasetTest); //trainset eval.evaluateModel(classifier, datasetTest); //testset result = classifier.classifyInstance(datasetTest.get(index)); } catch (Exception e) { result = ((DoubleSolution)testSolution).getVariable(index); } return result; }
Example 16
Source File: ADTree.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Recursive function that carries out search for the best test (splitter) to add to * this part of the tree, by aiming to minimize the Z value. Performs Z-pure cutoff to * reduce search space. * * @param currentNode the root of the subtree to be searched, and the current node * being considered as parent of a new split * @param posInstances the positive-class instances that apply at this node * @param negInstances the negative-class instances that apply at this node * @exception Exception if search fails */ private void searchForBestTestSingle(PredictionNode currentNode, Instances posInstances, Instances negInstances) throws Exception { // don't investigate pure or empty nodes any further if (posInstances.numInstances() == 0 || negInstances.numInstances() == 0) return; // do z-pure cutoff if (calcZpure(posInstances, negInstances) >= m_search_smallestZ) return; // keep stats m_nodesExpanded++; m_examplesCounted += posInstances.numInstances() + negInstances.numInstances(); // evaluate static splitters (nominal) for (int i=0; i<m_nominalAttIndices.length; i++) evaluateNominalSplitSingle(m_nominalAttIndices[i], currentNode, posInstances, negInstances); // evaluate dynamic splitters (numeric) if (m_numericAttIndices.length > 0) { // merge the two sets of instances into one Instances allInstances = new Instances(posInstances); for (Enumeration e = negInstances.enumerateInstances(); e.hasMoreElements(); ) allInstances.add((Instance) e.nextElement()); // use method of finding the optimal Z split-point for (int i=0; i<m_numericAttIndices.length; i++) evaluateNumericSplitSingle(m_numericAttIndices[i], currentNode, posInstances, negInstances, allInstances); } if (currentNode.getChildren().size() == 0) return; // keep searching switch (m_searchPath) { case SEARCHPATH_ALL: goDownAllPathsSingle(currentNode, posInstances, negInstances); break; case SEARCHPATH_HEAVIEST: goDownHeaviestPathSingle(currentNode, posInstances, negInstances); break; case SEARCHPATH_ZPURE: goDownZpurePathSingle(currentNode, posInstances, negInstances); break; case SEARCHPATH_RANDOM: goDownRandomPathSingle(currentNode, posInstances, negInstances); break; } }
Example 17
Source File: LHSSampler.java From bestconf with Apache License 2.0 | 4 votes |
/** * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable * * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){ int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist=null; //generate L sets of sampleSetSize points for(int i=0; i<L; i++){ ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if(crntMinDist>maxMinDist){ setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; double pace; for(int i=0;i<bounds.length;i++){ crntAttr = itr.next(); if(crntAttr.isNumeric()){ bounds[i][0] = crntAttr.getLowerNumericBound(); bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound(); pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize; for(int j=1;j<sampleSetSize;j++){ bounds[i][j] = bounds[i][j-1] + pace; } }else{//crntAttr.isNominal() if(crntAttr.numValues()>=sampleSetSize){ //randomly select among the set for(int j=0;j<=sampleSetSize;j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values }else{ //first round-robin int lastPart = sampleSetSize%crntAttr.numValues(); for(int j=0;j<sampleSetSize-lastPart;j++) bounds[i][j] = j%crntAttr.numValues(); //then randomly select for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues()); } }//nominal attribute }//get all subdomains //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for(int i=0;i<sampleSetSize;i++){ double[] vals = new double[atts.size()]; for(int j=0;j<vals.length;j++){ if(atts.get(j).isNumeric()){ vals[j] = useMid? (bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2: bounds[j][setWithMaxMinDist[j].get(i)]+ ( (bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble() ); }else{//isNominal() vals[j] = bounds[j][setWithMaxMinDist[j].get(i)]; } } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
Example 18
Source File: ArffFileFromRun.java From NLIWOD with GNU Affero General Public License v3.0 | 4 votes |
public static void main(String[] args) throws Exception { HAWK hawk = new HAWK(); SINA sina = new SINA(); QAKIS qakis = new QAKIS(); YODA yoda = new YODA(); /* * For multilable classification: */ ArrayList<String> fvhawk = new ArrayList<String>(); fvhawk.add("1"); fvhawk.add("0"); Attribute hawkatt = new Attribute("hawk", fvhawk); ArrayList<String> fvqakis = new ArrayList<String>(); fvqakis.add("1"); fvqakis.add("0"); Attribute qakisatt = new Attribute("qakis", fvqakis); ArrayList<String> fvyoda = new ArrayList<String>(); fvyoda.add("1"); fvyoda.add("0"); Attribute yodaatt = new Attribute("yoda", fvyoda); ArrayList<String> fvsina = new ArrayList<String>(); fvsina.add("1"); fvsina.add("0"); Attribute sinaatt = new Attribute("sina", fvsina); /* * */ // 1. Learn on the training data for each system a classifier to find // out which system can answer which question // 1.1 load the questions and how good each system answers log.debug("Load the questions and how good each system answers"); List<IQuestion> trainQuestions = LoaderController.load(Dataset.QALD6_Train_Multilingual); List<ASystem> systems = Lists.newArrayList(hawk, sina, qakis, yoda); JSONArray traindata = RunProducer.loadRunData(Dataset.QALD6_Train_Multilingual); // 1.2 calculate the features per question and system log.debug("Calculate the features per question and system"); Analyzer analyzer = new Analyzer(); ArrayList<Attribute> fvfinal = analyzer.fvWekaAttributes; fvfinal.add(0, hawkatt); fvfinal.add(0, yodaatt); fvfinal.add(0, sinaatt); fvfinal.add(0,qakisatt); Instances trainingSet = new Instances("training_classifier: -C 4" , fvfinal, trainQuestions.size()); log.debug("Start collection of training data for each system"); for (int i = 0; i < traindata.size(); i++) { JSONObject questiondata = (JSONObject) traindata.get(i); JSONObject allsystemsdata = (JSONObject) questiondata.get("answers"); String question = (String) questiondata.get("question"); Instance tmp = analyzer.analyze(question); tmp.setValue(hawkatt, 0); tmp.setValue(yodaatt, 0); tmp.setValue(sinaatt, 0); tmp.setValue(qakisatt, 0); for(ASystem system: systems){ JSONObject systemdata = (JSONObject) allsystemsdata.get(system.name()); if(new Double(systemdata.get("fmeasure").toString()) > 0) switch (system.name()){ case "hawk": tmp.setValue(hawkatt, 1); break; case "yoda": tmp.setValue(yodaatt, 1); break; case "sina": tmp.setValue(sinaatt, 1); break; case "qakis": tmp.setValue(qakisatt, 1); break; } } trainingSet.add(tmp); } log.debug(trainingSet.toString()); try (FileWriter file = new FileWriter("./src/main/resources/old/Train.arff")) { file.write(trainingSet.toString()); } catch (IOException e) { e.printStackTrace(); } }
Example 19
Source File: PropositionalToMultiInstance.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Signify that this batch of input to the filter is finished. * If the filter requires all instances prior to filtering, * output() may now be called to retrieve the filtered instances. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } Instances input = getInputFormat(); input.sort(0); // make sure that bagID is sorted Instances output = getOutputFormat(); Instances bagInsts = output.attribute(1).relation(); Instance inst = new DenseInstance(bagInsts.numAttributes()); inst.setDataset(bagInsts); double bagIndex = input.instance(0).value(0); double classValue = input.instance(0).classValue(); double bagWeight = 0.0; // Convert pending input instances for(int i = 0; i < input.numInstances(); i++) { double currentBagIndex = input.instance(i).value(0); // copy the propositional instance value, except the bagIndex and the class value for (int j = 0; j < input.numAttributes() - 2; j++) inst.setValue(j, input.instance(i).value(j + 1)); inst.setWeight(input.instance(i).weight()); if (currentBagIndex == bagIndex){ bagInsts.add(inst); bagWeight += inst.weight(); } else{ addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight); bagInsts = bagInsts.stringFreeStructure(); bagInsts.add(inst); bagIndex = currentBagIndex; classValue = input.instance(i).classValue(); bagWeight = inst.weight(); } } // reach the last instance, create and add the last bag addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight); if (getRandomize()) output.randomize(new Random(getSeed())); for (int i = 0; i < output.numInstances(); i++) push(output.instance(i)); // Free memory flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); }
Example 20
Source File: SAX.java From tsml with GNU General Public License v3.0 | 1 votes |
/** * Will perform a SAX transformation on a single data series passed as a double[], input format * must already be known. * * Generally to be used * in the SAX_1NN classifier (essentially a wrapper classifier that just feeds SAX-filtered * data to a 1NN classifier) to filter individual instances during testing * * Instance objects need the header info as well as the basic data * * @param alphabetSize size of SAX alphabet * @param numIntervals size of resulting word * @throws Exception */ public Instance convertInstance(Instance inst, int alphabetSize, int numIntervals) throws Exception { Instances newInsts = new Instances(inputFormat, 1); newInsts.add(inst); newInsts = process(newInsts); return newInsts.firstInstance(); }