Java Code Examples for weka.core.Instances#numClasses()
The following examples show how to use
weka.core.Instances#numClasses() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TTC.java From tsml with GNU General Public License v3.0 | 6 votes |
public static void main(String[] args) throws Exception{ String dataset = "Trace"; Instances inst = DatasetLoading.loadDataNullable("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TRAIN.arff"); Instances inst2 = DatasetLoading.loadDataNullable("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TEST.arff"); // Instances inst = ClassifierTools.loadData("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TRAIN.arff"); // Instances inst2 = ClassifierTools.loadData("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TEST.arff"); inst.setClassIndex(inst.numAttributes()-1); inst.addAll(inst2); TTC k = new TTC(); k.seed = 0; k.k = inst.numClasses(); k.buildClusterer(inst); System.out.println(k.clusters.length); System.out.println(Arrays.toString(k.clusters)); System.out.println(randIndex(k.assignments, inst)); }
Example 2
Source File: ShapeletTransformTimingUtilities.java From tsml with GNU General Public License v3.0 | 6 votes |
public static ShapeletFilter createTransform(Instances train){ int numClasses = train.numClasses(); int numInstances = train.numInstances() <= 2000 ? train.numInstances() : 2000; int numAttributes = train.numAttributes()-1; ShapeletFilter transform; if(numClasses == 2){ transform = new ShapeletFilter(); }else{ transform = new BalancedClassShapeletFilter(); transform.setClassValue(new BinaryClassValue()); } //transform.setSubSeqDistance(new ImprovedOnlineShapeletDistance()); transform.setShapeletMinAndMax(3, numAttributes); transform.setNumberOfShapelets(numInstances); transform.useCandidatePruning(); transform.turnOffLog(); transform.setRoundRobin(true); transform.supressOutput(); return transform; }
Example 3
Source File: FlipHistory.java From collective-classification-weka-package with GNU General Public License v3.0 | 6 votes |
/** * initializes the history * * @param inst the instance to initialize with */ public FlipHistory(Instances inst) { int i; // create arrays m_Instances = new Instance[inst.numInstances()]; m_Last = new double[inst.numInstances()][inst.numClasses()]; m_Average = new double[inst.numInstances()][inst.numClasses()]; m_Count = new int[inst.numInstances()]; // sort for (i = 0; i < inst.numInstances(); i++) m_Instances[i] = (Instance) inst.instance(i).copy(); Arrays.sort(m_Instances, m_Comparator); // init for (i = 0; i < m_Instances.length; i++) { m_Last[i][(int) m_Instances[i].classValue()] = 1.0; m_Average[i][(int) m_Instances[i].classValue()] = 1.0; } }
Example 4
Source File: UnsupervisedShapelets.java From tsml with GNU General Public License v3.0 | 6 votes |
public static void main(String[] args) throws Exception{ String dataset = "Trace"; Instances inst = DatasetLoading.loadDataNullable("Z:\\ArchiveData\\Univariate_arff\\"+dataset+"\\"+dataset+"_TRAIN.arff"); Instances inst2 = DatasetLoading.loadDataNullable("Z:\\ArchiveData\\Univariate_arff\\"+dataset+"\\"+dataset+"_TEST.arff"); inst.setClassIndex(inst.numAttributes()-1); inst.addAll(inst2); UnsupervisedShapelets us = new UnsupervisedShapelets(); us.seed = 0; us.k = inst.numClasses(); us.buildClusterer(inst); System.out.println(us.clusters.length); System.out.println(Arrays.toString(us.assignments)); System.out.println(Arrays.toString(us.clusters)); System.out.println(randIndex(us.assignments, inst)); }
Example 5
Source File: LPS.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Backfits the given data into the tree. */ public void backfitData(Instances data) throws Exception { double totalWeight = 0; double totalSumSquared = 0; // Compute initial class counts double[] classProbs = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = RandomRegressionTree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Fit data into tree backfitData(data, classProbs, totalWeight); }
Example 6
Source File: Sampling.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Reorder the dataset by its largest class * @param data * @return */ public static Instances orderByLargestClass(Instances data) { Instances newData = new Instances(data, data.numInstances()); // get the number of class in the data int nbClass = data.numClasses(); int[] instancePerClass = new int[nbClass]; int[] labels = new int[nbClass]; int[] classIndex = new int[nbClass]; // sort the data base on its class data.sort(data.classAttribute()); // get the number of instances per class in the data for (int i = 0; i < nbClass; i++) { instancePerClass[i] = data.attributeStats(data.classIndex()).nominalCounts[i]; labels[i] = i; if (i > 0) classIndex[i] = classIndex[i-1] + instancePerClass[i-1]; } QuickSort.sort(instancePerClass, labels); for (int i = nbClass-1; i >=0 ; i--) { for (int j = 0; j < instancePerClass[i]; j++) { newData.add(data.instance(classIndex[labels[i]] + j)); } } return newData; }
Example 7
Source File: Ridor.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Builds a single rule learner with REP dealing with 2 classes. * This rule learner always tries to predict the class with label * m_Class. * * @param instances the training data * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { m_ClassAttribute = instances.classAttribute(); if (!m_ClassAttribute.isNominal()) throw new UnsupportedClassTypeException(" Only nominal class, please."); if(instances.numClasses() != 2) throw new Exception(" Only 2 classes, please."); Instances data = new Instances(instances); if(Utils.eq(data.sumOfWeights(),0)) throw new Exception(" No training data."); data.deleteWithMissingClass(); if(Utils.eq(data.sumOfWeights(),0)) throw new Exception(" The class labels of all the training data are missing."); if(data.numInstances() < m_Folds) throw new Exception(" Not enough data for REP."); m_Antds = new FastVector(); /* Split data into Grow and Prune*/ m_Random = new Random(m_Seed); data.randomize(m_Random); data.stratify(m_Folds); Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random); Instances pruneData=data.testCV(m_Folds, m_Folds-1); grow(growData); // Build this rule prune(pruneData); // Prune this rule }
Example 8
Source File: TunedXGBoost.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override public void buildClassifier(Instances insts) throws Exception { // long startTime=System.nanoTime(); long startTime=System.nanoTime(); booster = null; trainResults =new ClassifierResults(); trainInsts = new Instances(insts); numTrainInsts = insts.numInstances(); numAtts = insts.numAttributes(); numClasses = insts.numClasses(); if(cvFolds>numTrainInsts) cvFolds=numTrainInsts; // rng = new Random(seed); //for tie resolution etc if needed buildActualClassifer(); if(getEstimateOwnPerformance()&& !tuneParameters) //if tuneparas, will take the cv results of the best para set trainResults = estimateTrainAcc(trainInsts); if(saveEachParaAcc) trainResults.setBuildTime(combinedBuildTime); else trainResults.setBuildTime(System.nanoTime()-startTime); // trainResults.buildTime=System.nanoTime()-startTime; trainResults.setTimeUnit(TimeUnit.NANOSECONDS); trainResults.setClassifierName(tuneParameters ? "TunedXGBoost" : "XGBoost"); trainResults.setDatasetName(trainInsts.relationName()); trainResults.setParas(getParameters()); }
Example 9
Source File: RnnTextEmbeddingInstanceIteratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
@Test public void testOutputFormat() throws Exception { Instances data = DatasetLoader.loadReutersMinimal(); for (int tl : Arrays.asList(10, 50, 200)) { tii.setTruncateLength(tl); for (int bs : Arrays.asList(1, 4, 8, 16)) { final DataSetIterator it = tii.getDataSetIterator(data, TestUtil.SEED, bs); assertEquals(bs, it.batch()); assertEquals(Arrays.asList("0", "1"), it.getLabels()); final DataSet next = Utils.getNext(it); // Check feature shape, expect: (batchsize x wordvecsize x sequencelength) final long[] shapeFeats = next.getFeatures().shape(); final long[] expShapeFeats = {bs, WORD_VEC_SIZE, tl}; assertEquals(expShapeFeats[0], shapeFeats[0]); assertEquals(expShapeFeats[1], shapeFeats[1]); assertTrue(expShapeFeats[2] >= shapeFeats[2]); // Check label shape, expect: (batchsize x numclasses x sequencelength) final long[] shapeLabels = next.getLabels().shape(); final long[] expShapeLabels = {bs, data.numClasses(), tl}; assertEquals(expShapeLabels[0], shapeLabels[0]); assertEquals(expShapeLabels[1], shapeLabels[1]); assertTrue(expShapeLabels[2] >= shapeLabels[2]); } } }
Example 10
Source File: InstanceTools.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * by Tony * Public method to calculate the class distributions of a dataset. */ public static double[] findClassDistributions(Instances data) { double[] dist=new double[data.numClasses()]; for(Instance d:data) dist[(int)d.classValue()]++; for(int i=0;i<dist.length;i++) dist[i]/=data.numInstances(); return dist; }
Example 11
Source File: InstanceTools.java From tsml with GNU General Public License v3.0 | 5 votes |
public static double[] classDistribution(Instances instances) { double[] distribution = new double[instances.numClasses()]; for(Instance instance : instances) { distribution[(int) instance.classValue()]++; } normalise(distribution); return distribution; }
Example 12
Source File: MLPipeline.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
@Override public void buildClassifier(Instances data) throws Exception { /* reduce dimensionality */ long start; int numAttributesBefore = data.numAttributes(); logger.info("Starting to build the preprocessors of the pipeline."); for (SupervisedFilterSelector pp : this.preprocessors) { /* if the filter has not been trained yet, do so now and store it */ if (!pp.isPrepared()) { try { start = System.currentTimeMillis(); pp.prepare(data); this.timeForTrainingPreprocessors = (int) (System.currentTimeMillis() - start); int newNumberOfClasses = pp.apply(data).numClasses(); if (data.numClasses() != newNumberOfClasses) { logger.info("{} changed number of classes from {} to {}", pp.getSelector(), data.numClasses(), newNumberOfClasses); } } catch (NullPointerException e) { logger.error("Could not apply preprocessor", e); } } /* now apply the attribute selector */ data = pp.apply(data); } logger.info("Reduced number of attributes from {} to {}", numAttributesBefore, data.numAttributes()); /* build classifier based on reduced data */ start = System.currentTimeMillis(); super.getClassifier().buildClassifier(data); this.timeForTrainingClassifier = (int) (System.currentTimeMillis() - start); this.trained = true; this.timeForExecutingPreprocessors = new DescriptiveStatistics(); this.timeForExecutingClassifier = new DescriptiveStatistics(); }
Example 13
Source File: AbstractTextEmbeddingIterator.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
/** * Create a sentence provider from the given data. * * @param data Data * @return Sentence provider */ public LabeledSentenceProvider getSentenceProvider(Instances data) { List<String> sentences = new ArrayList<>(); List<String> labels = new ArrayList<>(); final int clsIdx = data.classIndex(); for (Instance inst : data) { labels.add(String.valueOf(inst.value(clsIdx))); sentences.add(inst.stringValue(1 - clsIdx)); } return new CollectionLabeledSentenceProvider(sentences, labels, data.numClasses()); }
Example 14
Source File: Utils.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
/** * Converts a set of training instances to a DataSet. Assumes that the instances have been * suitably preprocessed - i.e. missing values replaced and nominals converted to binary/numeric. * Also assumes that the class index has been set * * @param insts the instances to convert * @return a DataSet */ public static DataSet instancesToDataSet(Instances insts) { INDArray data = Nd4j.zeros(insts.numInstances(), insts.numAttributes() - 1); INDArray outcomes = Nd4j.zeros(insts.numInstances(), insts.numClasses()); for (int i = 0; i < insts.numInstances(); i++) { double[] independent = new double[insts.numAttributes() - 1]; double[] dependent = new double[insts.numClasses()]; Instance current = insts.instance(i); for (int j = 0; j < current.numValues(); j++) { int index = current.index(j); double value = current.valueSparse(j); if (index < insts.classIndex()) { independent[index] = value; } else if (index > insts.classIndex()) { // Shift by -1, since the class is left out from the feature matrix and put into a separate // outcomes matrix independent[index - 1] = value; } } // Set class values if (insts.numClasses() > 1) { // Classification final int oneHotIdx = (int) current.classValue(); dependent[oneHotIdx] = 1.0; } else { // Regression (currently only single class) dependent[0] = current.classValue(); } INDArray row = Nd4j.create(independent); data.putRow(i, row); outcomes.putRow(i, Nd4j.create(dependent)); } return new DataSet(data, outcomes); }
Example 15
Source File: Ridor.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Builds a ripple-down manner rule learner. * * @param instances the training data * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class Instances data = new Instances(instances); data.deleteWithMissingClass(); int numCl = data.numClasses(); m_Root = new Ridor_node(); m_Class = instances.classAttribute(); // The original class label int index = data.classIndex(); m_Cover = data.sumOfWeights(); m_Random = new Random(m_Seed); /* Create a binary attribute */ FastVector binary_values = new FastVector(2); binary_values.addElement("otherClasses"); binary_values.addElement("defClass"); Attribute attr = new Attribute ("newClass", binary_values); data.insertAttributeAt(attr, index); data.setClassIndex(index); // The new class label /* Partition the data into bags according to their original class values */ Instances[] dataByClass = new Instances[numCl]; for(int i=0; i < numCl; i++) dataByClass[i] = new Instances(data, data.numInstances()); // Empty bags for(int i=0; i < data.numInstances(); i++){ // Partitioning Instance inst = data.instance(i); inst.setClassValue(0); // Set new class vaue to be 0 dataByClass[(int)inst.value(index+1)].add(inst); } for(int i=0; i < numCl; i++) dataByClass[i].deleteAttributeAt(index+1); // Delete original class m_Root.findRules(dataByClass, 0); }
Example 16
Source File: FastShapelets.java From tsml with GNU General Public License v3.0 | 4 votes |
public void train(Instances data, int R, int top_k) { int sax_max_len, sax_len, w; int max_len = data.numAttributes() - 1, min_len = 10, step = 1; //consider whole search space. double percent_mask; Shapelet sh; rand = new Random(seed); numClass = data.numClasses(); numObj = data.numInstances(); sax_max_len = 15; percent_mask = 0.25; //R = 10; //top_k = 10; readTrainData(data); //initialise our data structures. nodeObjList = new ArrayList<>(); finalSh = new ArrayList<>(); uSAXMap = new HashMap<>(); scoreList = new ArrayList<>(); classifyList = new ArrayList<>(); /// Find Shapelet for (int node_id = 1; (node_id == 1) || (node_id < nodeObjList.size()); node_id++) { Shapelet bsf_sh = new Shapelet(); if (node_id <= 1) { setCurData(node_id); } else if (classifyList.get(node_id) == -1) { /// non-leaf node (-1:body node, -2:unused node) setCurData(node_id); } else { continue; } //3 to series length. for (subseqLength = min_len; subseqLength <= max_len; subseqLength += step) { /// Shapelet cannot be too short, e.g. len=1. if (subseqLength < SH_MIN_LEN) { continue; } sax_len = sax_max_len; /// Make w and sax_len both integer w = (int) Math.ceil(1.0 * subseqLength / sax_len); sax_len = (int) Math.ceil(1.0 * subseqLength / w); createSAXList(subseqLength, sax_len, w); randomProjection(R, percent_mask, sax_len); scoreAllSAX(R); sh = findBestSAX(top_k); if (bsf_sh.lessThan(sh)) { bsf_sh = sh; } uSAXMap.clear(); scoreList.clear(); } if (bsf_sh.len > 0) { double[] query = new double[bsf_sh.len]; for (int i = 0; i < bsf_sh.len; i++) { query[i] = this.data.get(bsf_sh.obj).get(bsf_sh.pos + i); } bsf_sh.setTS(query); finalSh.add(bsf_sh); /// post-processing: create tree setNextNodeObj(node_id, bsf_sh); } } }
Example 17
Source File: Sampling.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Reorder the data by compactness of each class using Euclidean distance * @param data * @return */ public static Instances orderByCompactClass(Instances data) { Instances newData = new Instances(data, data.numInstances()); // get the number of class in the data int nbClass = data.numClasses(); int[] instancePerClass = new int[nbClass]; int[] labels = new int[nbClass]; int[] classIndex = new int[nbClass]; double[] compactness = new double[nbClass]; // sort the data base on its class data.sort(data.classAttribute()); int start = 0; // get the number of instances per class in the data for (int i = 0; i < nbClass; i++) { instancePerClass[i] = data.attributeStats(data.classIndex()).nominalCounts[i]; labels[i] = i; if (i > 0) classIndex[i] = classIndex[i-1] + instancePerClass[i-1]; int end = start + instancePerClass[i]; int counter = 0; double[][] dataPerClass = new double[instancePerClass[i]][data.numAttributes()-1]; for (int j = start; j < end; j++) { dataPerClass[counter++] = data.instance(j).toDoubleArray(); } double[] mean = arithmeticMean(dataPerClass); double d = 0; for (int j = 0; j < instancePerClass[i]; j++) { double temp = euclideanDistance(mean, dataPerClass[j]); temp *= temp; temp -= (mean[0] - dataPerClass[j][0]) * (mean[0] - dataPerClass[j][0]); d += temp; } compactness[i] = d / instancePerClass[i]; start = end; } QuickSort.sort(compactness, labels); for (int i = nbClass-1; i >=0 ; i--) { for (int j = 0; j < instancePerClass[labels[i]]; j++) { newData.add(data.instance(classIndex[labels[i]] + j)); } } return newData; }
Example 18
Source File: BFTree.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Generate successor nodes for a node and put them into BestFirstElements * according to gini gain or information gain in a descending order. * * @param BestFirstElements list to store BestFirst nodes * @param data training instance * @param subsetSortedIndices sorted indices of instances of successor nodes * @param subsetWeights weights of instances of successor nodes * @param dists class distributions of successor nodes * @param att attribute used to split the node * @param useHeuristic if use heuristic search for nominal attributes in multi-class problem * @param useGini if use Gini index as splitting criterion * @throws Exception if something goes wrong */ protected void makeSuccessors(FastVector BestFirstElements,Instances data, int[][][] subsetSortedIndices, double[][][] subsetWeights, double[][][] dists, Attribute att, boolean useHeuristic, boolean useGini) throws Exception { m_Successors = new BFTree[2]; for (int i=0; i<2; i++) { m_Successors[i] = new BFTree(); m_Successors[i].m_isLeaf = true; // class probability and distribution for this successor node m_Successors[i].m_ClassProbs = new double[data.numClasses()]; m_Successors[i].m_Distribution = new double[data.numClasses()]; System.arraycopy(dists[att.index()][i], 0, m_Successors[i].m_ClassProbs, 0,m_Successors[i].m_ClassProbs.length); System.arraycopy(dists[att.index()][i], 0, m_Successors[i].m_Distribution, 0,m_Successors[i].m_Distribution.length); if (Utils.sum(m_Successors[i].m_ClassProbs)!=0) Utils.normalize(m_Successors[i].m_ClassProbs); // split information for this successor node double[][] props = new double[data.numAttributes()][2]; double[][][] subDists = new double[data.numAttributes()][2][data.numClasses()]; double[][] totalSubsetWeights = new double[data.numAttributes()][2]; FastVector splitInfo = m_Successors[i].computeSplitInfo(m_Successors[i], data, subsetSortedIndices[i], subsetWeights[i], subDists, props, totalSubsetWeights, useHeuristic, useGini); // branch proportion for this successor node int splitIndex = ((Attribute)splitInfo.elementAt(1)).index(); m_Successors[i].m_Props = new double[2]; System.arraycopy(props[splitIndex], 0, m_Successors[i].m_Props, 0, m_Successors[i].m_Props.length); // sorted indices and weights of each attribute for this successor node m_Successors[i].m_SortedIndices = new int[data.numAttributes()][0]; m_Successors[i].m_Weights = new double[data.numAttributes()][0]; for (int j=0; j<m_Successors[i].m_SortedIndices.length; j++) { m_Successors[i].m_SortedIndices[j] = subsetSortedIndices[i][j]; m_Successors[i].m_Weights[j] = subsetWeights[i][j]; } // distribution of each attribute for this successor node m_Successors[i].m_Dists = new double[data.numAttributes()][2][data.numClasses()]; for (int j=0; j<subDists.length; j++) { m_Successors[i].m_Dists[j] = subDists[j]; } // total weights for this successor node. m_Successors[i].m_TotalWeight = Utils.sum(totalSubsetWeights[splitIndex]); // insert this successor node into BestFirstElements according to gini gain or information gain // descendingly if (BestFirstElements.size()==0) { BestFirstElements.addElement(splitInfo); } else { double gGain = ((Double)(splitInfo.elementAt(3))).doubleValue(); int vectorSize = BestFirstElements.size(); FastVector lastNode = (FastVector)BestFirstElements.elementAt(vectorSize-1); // If gini gain is less than that of last node in FastVector if (gGain<((Double)(lastNode.elementAt(3))).doubleValue()) { BestFirstElements.insertElementAt(splitInfo, vectorSize); } else { for (int j=0; j<vectorSize; j++) { FastVector node = (FastVector)BestFirstElements.elementAt(j); double nodeGain = ((Double)(node.elementAt(3))).doubleValue(); if (gGain>=nodeGain) { BestFirstElements.insertElementAt(splitInfo, j); break; } } } } } }
Example 19
Source File: WindowSearcher.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * This is similar to buildClassifier but it is an estimate. * This is used for large dataset where it takes very long to run. * The main purpose of this is to get the run time and not actually search for the best window. * We use this to draw Figure 1 of our SDM18 paper * * @param data * @param estimate * @throws Exception */ public void buildClassifierEstimate(Instances data, int estimate) throws Exception { // Initialise training dataset Attribute classAttribute = data.classAttribute(); classedData = new HashMap <>(); classedDataIndices = new HashMap <>(); for (int c = 0; c < data.numClasses(); c++) { classedData.put(data.classAttribute().value(c), new ArrayList <SymbolicSequence>()); classedDataIndices.put(data.classAttribute().value(c), new ArrayList <Integer>()); } train = new SymbolicSequence[data.numInstances()]; classMap = new String[train.length]; maxLength = 0; for (int i = 0; i < train.length; i++) { Instance sample = data.instance(i); MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; maxLength = Math.max(maxLength, sequence.length); int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); } train[i] = new SymbolicSequence(sequence); String clas = sample.stringValue(classAttribute); classMap[i] = clas; classedData.get(clas).add(train[i]); classedDataIndices.get(clas).add(i); } warpingMatrix = new double[maxLength][maxLength]; U = new double[maxLength]; L = new double[maxLength]; maxWindow = Math.round(1 * maxLength); searchResults = new String[maxWindow + 1]; nns = new int[maxWindow + 1][train.length]; dist = new double[maxWindow + 1][train.length]; int[] nErrors = new int[maxWindow + 1]; double[] score = new double[maxWindow + 1]; double bestScore = Double.MAX_VALUE; double minD; bestWarpingWindow = -1; // Start searching for the best window. // Only loop through a given size of the dataset, but still search for NN from the whole train // for every sequence in train, we find NN for all window // then in the end, update the best score for (int i = 0; i < estimate; i++) { SymbolicSequence testSeq = train[i]; for (int w = 0; w <= maxWindow; w++) { testSeq.LB_KeoghFillUL(w, U, L); minD = Double.MAX_VALUE; String classValue = null; for (int j = 0; j < train.length; j++) { if (i == j) continue; SymbolicSequence trainSeq = train[j]; if (SymbolicSequence.LB_KeoghPreFilled(trainSeq, U, L) < minD) { double tmpD = testSeq.DTW(trainSeq, w, warpingMatrix); if (tmpD < minD) { minD = tmpD; classValue = classMap[j]; nns[w][i] = j; } dist[w][j] = tmpD * tmpD; } } if (classValue == null || !classValue.equals(classMap[i])) { nErrors[w]++; } score[w] = 1.0 * nErrors[w] / train.length; } } for (int w = 0; w < maxWindow; w++) { if (score[w] < bestScore) { bestScore = score[w]; bestWarpingWindow = w; } } // Saving best windows found System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1 - bestScore)); }
Example 20
Source File: RnnSequenceClassifier.java From wekaDeeplearning4j with GNU General Public License v3.0 | 4 votes |
/** * The method to use when making predictions for test instances. * * @param insts the instances to get predictions for * @return the class probability estimates (if the class is nominal) or the numeric predictions * (if it is numeric) * @throws Exception if something goes wrong at prediction time */ @Override public double[][] distributionsForInstances(Instances insts) throws Exception { log.info("Calc. dist for {} instances", insts.numInstances()); // Do we only have a ZeroR model? if (zeroR != null) { return zeroR.distributionsForInstances(insts); } // Process input data to have the same filters applied as the training data insts = applyFilters(insts); // Get predictions final DataSetIterator it = getDataSetIterator(insts, CacheMode.NONE); double[][] preds = new double[insts.numInstances()][insts.numClasses()]; if (it.resetSupported()) { it.reset(); } int offset = 0; boolean next = it.hasNext(); // Get predictions batch-wise while (next) { final DataSet ds = Utils.getNext(it); final INDArray features = ds.getFeatures(); final INDArray labelsMask = ds.getLabelsMaskArray(); INDArray lastTimeStepIndices; if (labelsMask != null) { lastTimeStepIndices = Nd4j.argMax(labelsMask, 1); } else { lastTimeStepIndices = Nd4j.zeros(features.size(0), 1); } INDArray predBatch = model.outputSingle(features); int currentBatchSize = (int) predBatch.size(0); for (int i = 0; i < currentBatchSize; i++) { int thisTimeSeriesLastIndex = lastTimeStepIndices.getInt(i); INDArray thisExampleProbabilities = predBatch.get( NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(thisTimeSeriesLastIndex)); for (int j = 0; j < insts.numClasses(); j++) { preds[i + offset][j] = thisExampleProbabilities.getDouble(j); } } offset += currentBatchSize; // add batchsize as offset boolean iteratorHasInstancesLeft = offset < insts.numInstances(); next = it.hasNext() || iteratorHasInstancesLeft; } // Fix classes for (int i = 0; i < preds.length; i++) { if (preds[i].length > 1) { weka.core.Utils.normalize(preds[i]); } else { // Rescale numeric classes with the computed coefficients in the initialization phase preds[i][0] = preds[i][0] * x1 + x0; } } return preds; }