weka.core.Instance Java Examples
The following examples show how to use
weka.core.Instance.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RemovePercentage.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Input an instance for filtering. Ordinarily the instance is processed * and made available for output immediately. Some filters require all * instances be read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be * collected with output(). * @throws IllegalStateException if no input format has been set. */ public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if (isFirstBatchDone()) { push(instance); return true; } else { bufferInput(instance); return false; } }
Example #2
Source File: ArffMetaDataLabelGenerator.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Default constructor which sets the metaData * * @param metaData Meta data with mapping: filename to label * @param path Directory path */ public ArffMetaDataLabelGenerator(Instances metaData, String path) { // If this path is absolute set it as basepath if (new File(path).isAbsolute()) { this.basePath = path; } else { String currentPath = Paths.get(System.getProperty("user.dir")).toString(); this.basePath = Paths.get(currentPath, path).toString(); } // Fill mapping from image path to fileLabelMap = new TreeMap<>(); paths = new ArrayList<>(); labels = new ArrayList<>(); for (Instance inst : metaData) { String fileName = inst.stringValue(0); String label = inst.stringValue(1); String absPath = Paths.get(this.basePath, fileName).toFile().getAbsolutePath(); paths.add(absPath); labels.add(label); fileLabelMap.put(absPath, label); } }
Example #3
Source File: ClassifierWrapper.java From orbit-image-analysis with GNU General Public License v3.0 | 6 votes |
/** * This method is not thread-safe! Use makeCopy to get an instance for each thread. * In case of clustering, the class-labels will be ordered with respect to the priors (e.g. background (high prior) will be class 0). * * @param instance * @return * @throws Exception */ public double classifyInstance(Instance instance) throws Exception { double c = 0d; if (wrapperType == WRAPPERTYPE_CLASSIFIER) { c = classifier.classifyInstance(instance); } else { c = clusterer.clusterInstance(instance); //double p = clusterer.distributionForInstance(instance)[(int)c]; if (clusterer instanceof DensityBasedClusterer) { c = getClusterOrder()[(getClusterOrder().length - 1) - (int) c]; } //if (p<0.95d) c = 0; } if (binaryClassification >= 0) { if ((int) c == binaryClassification) c = 1; else c = 0; } return c; }
Example #4
Source File: LbLcss.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Lower bound for LCSS distance with early abandoning * * @param c candidate sequence * @param U upper envelope * @param L lower envelope * @param cutOffValue cutoff value for early abandoning * @return lower bound distance */ public static double distance(final Instance c, final double[] U, final double[] L, final double cutOffValue) { final int length = Math.min(U.length, c.numAttributes() - 1); final double ub = (1.0 - cutOffValue) * length; double lcs = 0; for (int i = 0; i < length; i++) { if (c.value(i) <= U[i] && c.value(i) >= L[i]) { lcs++; if (lcs <= ub) return 1; } } return 1 - lcs / length; }
Example #5
Source File: Cobweb.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Temporarily adds a new instance to each of this nodes children * in turn and computes the category utility. * * @param newInstance the new instance to evaluate * @return an array of category utility values---the result of considering * each child in turn as a host for the new instance * @throws Exception if an error occurs */ private double[] cuScoresForChildren(Instance newInstance) throws Exception { // look for a host in existing children double[] categoryUtils = new double [m_children.size()]; // look for a home for this instance in the existing children for (int i = 0; i < m_children.size(); i++) { CNode temp = (CNode) m_children.elementAt(i); // tentitively add the new instance to this child temp.updateStats(newInstance, false); categoryUtils[i] = categoryUtility(); // remove the new instance from this child temp.updateStats(newInstance, true); } return categoryUtils; }
Example #6
Source File: LbErp.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Build the upper and lower envelope for LB Keogh with modification for ERP * * @param a time series * @param g g parameter value * @param bandSize size of the warping window * @param U upper envelope * @param L lower envelope */ public static void fillUL(final Instance a, final double g, final double bandSize, final double[] U, final double[] L) { final int length = a.numAttributes() - 1; final int r = (int) Math.ceil(length * bandSize); double min, max; for (int i = 0; i < length; i++) { min = g; max = g; final int startR = Math.max(0, i - r); final int stopR = Math.min(length - 1, i + r); for (int j = startR; j <= stopR; j++) { final double value = a.value(j); min = Math.min(min, value); max = Math.max(max, value); } U[i] = max; L[i] = min; } }
Example #7
Source File: ClassifierSplitModel.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Gets class probability for instance. * * @exception Exception if something goes wrong */ public double classProbLaplace(int classIndex, Instance instance, int theSubset) throws Exception { if (theSubset > -1) { return m_distribution.laplaceProb(classIndex, theSubset); } else { double [] weights = weights(instance); if (weights == null) { return m_distribution.laplaceProb(classIndex); } else { double prob = 0; for (int i = 0; i < weights.length; i++) { prob += weights[i] * m_distribution.laplaceProb(classIndex, i); } return prob; } } }
Example #8
Source File: AbstractVectorClusterer.java From tsml with GNU General Public License v3.0 | 6 votes |
protected void normaliseData(Instances data) throws Exception{ if (data.classIndex() >= 0 && data.classIndex() != data.numAttributes()-1){ throw new Exception("Class attribute is available and not the final attribute."); } attributeMeans = new double[data.numAttributes()-1]; attributeStdDevs = new double[data.numAttributes()-1]; for (int i = 0; i < data.numAttributes()-1; i++){ attributeMeans[i] = data.attributeStats(i).numericStats.mean; attributeStdDevs[i] = data.attributeStats(i).numericStats .stdDev; for (int n = 0; n < data.size(); n++){ Instance instance = data.get(n); instance.setValue(i, (instance.value(i) - attributeMeans[i]) /attributeStdDevs[i]); } } }
Example #9
Source File: CitationKNN.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Computes the distribution for a given exemplar * * @param bag the exemplar for which distribution is computed * @return the distribution * @throws Exception if the distribution can't be computed successfully */ public double[] distributionForInstance(Instance bag) throws Exception { if(m_TrainBags.numInstances() == 0) throw new Exception("No training bags!"); updateNormalization(bag); //build references (R nearest neighbors) countBagReferences(bag); //build citers countBagCiters(bag); return makeDistribution(); }
Example #10
Source File: DatasetLists.java From tsml with GNU General Public License v3.0 | 6 votes |
public static void dataDescriptionDataNotSplit(String[] fileNames, String problemPath){ //Produce summary descriptions //dropboxPath=uciPath; OutFile f=new OutFile(problemPath+"DataDimensions.csv"); f.writeLine("problem,numinstances,numAttributes,numClasses,classDistribution"); try{ for(int i=0;i<fileNames.length;i++){ Instances allData=DatasetLoading.loadDataNullable(problemPath+fileNames[i]+"/"+fileNames[i]); // allData.randomize(new Random()); // OutFile combo=new OutFile(problemPath+tscProblems85[i]+"/"+tscProblems85[i]+".arff"); // combo.writeString(allData.toString()); int[] classCounts=new int[allData.numClasses()]; for(Instance ins: allData) classCounts[(int)(ins.classValue())]++; f.writeString(fileNames[i]+","+allData.numInstances()+","+(allData.numAttributes()-1)+","+allData.numClasses()); for(int c:classCounts) f.writeString(","+(c/(double)allData.numInstances())); f.writeString("\n"); } }catch(Exception e){ System.out.println(" ERRROR"+e); } }
Example #11
Source File: LinearRegression.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Calculate the dependent value for a given instance for a * given regression model. * * @param transformedInstance the input instance * @param selectedAttributes an array of flags indicating which * attributes are included in the regression model * @param coefficients an array of coefficients for the regression * model * @return the regression value for the instance. * @throws Exception if the class attribute of the input instance * is not assigned */ protected double regressionPrediction(Instance transformedInstance, boolean[] selectedAttributes, double[] coefficients) throws Exception { double result = 0; int column = 0; for (int j = 0; j < transformedInstance.numAttributes(); j++) { if ((m_ClassIndex != j) && (selectedAttributes[j])) { result += coefficients[column] * transformedInstance.value(j); column++; } } result += coefficients[column]; return result; }
Example #12
Source File: AnthOnlineClassifier.java From anthelion with Apache License 2.0 | 6 votes |
/** * Sets the prediction variable within a {@link AnthURL} based on the * learned classifier. As the {@link AnthURL} implements {@link Comparable} * Interface and is included in a {@link PriorityQueue} this effects the * ordering. * * @param aurl * the {@link AnthURL} */ public void classifyUrl(AnthURL aurl) { Instance inst = convert(aurl); if (inst != null) { // good class = 0, bad class = 1 double[] res = learner.getVotesForInstance(inst); classifiedItems++; if (res.length < 2) { unclassifiableItems++; aurl.prediction = 0; } else { aurl.prediction = res[0] - res[1]; } } else { aurl.prediction = 0; } }
Example #13
Source File: DataSetUtils.java From AILibs with GNU Affero General Public License v3.0 | 6 votes |
public static INDArray cifar10InstanceToMatrix(final Instance instance) { final INDArray result = Nd4j.create(32, 32, 3); final double[] imageValues = instance.toDoubleArray(); if (imageValues.length != (32 * 32 * 3 + 1)) { throw new IllegalArgumentException("Cifar 10 instances must have the dimensionality of 32 x 32 x 3!"); } for (int i = 0; i < 32; i++) { for (int j = 0; j < 32; j++) { final int offset = i + 1; result.putScalar(new int[] { i, j, 0 }, imageValues[offset * j]); result.putScalar(new int[] { i, j, 1 }, imageValues[1024 + offset * j]); result.putScalar(new int[] { i, j, 2 }, imageValues[2048 + offset * j]); } } return result; }
Example #14
Source File: PolyKernel.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * * @param id1 the index of instance 1 * @param id2 the index of instance 2 * @param inst1 the instance 1 object * @return the dot product * @throws Exception if something goes wrong */ protected double evaluate(int id1, int id2, Instance inst1) throws Exception { double result; if (id1 == id2) { result = dotProd(inst1, inst1); } else { result = dotProd(inst1, m_data.instance(id2)); } // Use lower order terms? if (m_lowerOrder) { result += 1.0; } if (m_exponent != 1.0) { result = Math.pow(result, m_exponent); } return result; }
Example #15
Source File: DataProcessing.java From tsml with GNU General Public License v3.0 | 6 votes |
public static void debugFormat(){ // ECGActivities Instances train,test; train=DatasetLoading.loadDataNullable("Z:\\Data\\MultivariateTSCProblems\\ECGActivities\\ECGActivities_TRAIN"); test=DatasetLoading.loadDataNullable("Z:\\Data\\MultivariateTSCProblems\\ECGActivities\\ECGActivities_TEST"); // Instances[] split=InstanceTools.resampleTrainAndTestInstances(train, test, 1); Instances[] split=MultivariateInstanceTools.resampleMultivariateTrainAndTestInstances(train, test, 1); System.out.println("IS it relational ? "+split[0].checkForAttributeType(Attribute.RELATIONAL)); System.out.println("IS it relational ? "+split[0].checkForAttributeType(Attribute.RELATIONAL)); System.out.println("Fold 1 TRAIN num instances "+split[0].numInstances()+" Num atts ="+(split[0].numAttributes()-1)); // System.out.println(split[0]+""); System.out.println("Fold 1 TRAIN instance 1 num dimensions "+split[0].instance(0).relationalValue(0).numInstances()+" series length "+split[0].instance(0).relationalValue(0).numAttributes()); for(Instance ins:split[0]) System.out.println("Fold TRAIN instance num dimensions "+ins.relationalValue(0).numInstances()+" series length "+ins.relationalValue(0).numAttributes()); }
Example #16
Source File: CheckEstimator.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Compare two datasets to see if they differ. * * @param data1 one set of instances * @param data2 the other set of instances * @throws Exception if the datasets differ */ protected void compareDatasets(Instances data1, Instances data2) throws Exception { if (!data2.equalHeaders(data1)) { throw new Exception("header has been modified\n" + data2.equalHeadersMsg(data1)); } if (!(data2.numInstances() == data1.numInstances())) { throw new Exception("number of instances has changed"); } for (int i = 0; i < data2.numInstances(); i++) { Instance orig = data1.instance(i); Instance copy = data2.instance(i); for (int j = 0; j < orig.numAttributes(); j++) { if (orig.isMissing(j)) { if (!copy.isMissing(j)) { throw new Exception("instances have changed"); } } else if (orig.value(j) != copy.value(j)) { throw new Exception("instances have changed"); } if (orig.weight() != copy.weight()) { throw new Exception("instance weights have changed"); } } } }
Example #17
Source File: GlobalScoreSearchAlgorithm.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * LeaveOneOutCV returns the accuracy calculated using Leave One Out * cross validation. The dataset used is m_Instances associated with * the Bayes Network. * @param bayesNet : Bayes Network containing structure to evaluate * @return accuracy (in interval 0..1) measured using leave one out cv. * @throws Exception passed on by updateClassifier */ public double leaveOneOutCV(BayesNet bayesNet) throws Exception { m_BayesNet = bayesNet; double fAccuracy = 0.0; double fWeight = 0.0; Instances instances = bayesNet.m_Instances; bayesNet.estimateCPTs(); for (int iInstance = 0; iInstance < instances.numInstances(); iInstance++) { Instance instance = instances.instance(iInstance); instance.setWeight(-instance.weight()); bayesNet.updateClassifier(instance); fAccuracy += accuracyIncrease(instance); fWeight += instance.weight(); instance.setWeight(-instance.weight()); bayesNet.updateClassifier(instance); } return fAccuracy / fWeight; }
Example #18
Source File: SAXVSM.java From tsml with GNU General Public License v3.0 | 6 votes |
@Override public double[] distributionForInstance(Instance instance) throws Exception { int numClasses = corpus.numInstances(); double[] termFreqs = bop.bagToArray(bop.buildBag(instance)); //find similarity to each class double[] similarities = new double[numClasses]; double sum = 0.0; for (int i = 0; i < numClasses; ++i) { similarities[i] = cosineSimilarity(corpus.get(i).toDoubleArray(), termFreqs, termFreqs.length); sum+=similarities[i]; } //return as a set of probabilities if (sum != 0) for (int i = 0; i < numClasses; ++i) similarities[i] /= sum; return similarities; }
Example #19
Source File: Analyzer.java From NLIWOD with GNU Affero General Public License v3.0 | 5 votes |
/** * Analyzes the question and extracts all features that were set for this Analyzer. * @param q question string * @return feature vector for the input question */ public Instance analyze(String q) { Instance tmpInstance = new DenseInstance(fvWekaAttributes.size()); for (IAnalyzer analyzer : analyzers) { //special case for PartOfSpeechTags, need to set 36 attributes if(analyzer instanceof PartOfSpeechTags) { analyzePOS(tmpInstance, (PartOfSpeechTags) analyzer, q); continue; } //special case for Dependencies, need to set 18 attributes if(analyzer instanceof Dependencies) { analyzeDeps(tmpInstance, (Dependencies) analyzer, q); continue; } Attribute attribute = analyzer.getAttribute(); if (attribute.isNumeric()) { tmpInstance.setValue(attribute, (double) analyzer.analyze(q)); } else if (attribute.isNominal() || attribute.isString()) { String value = (String) analyzer.analyze(q); tmpInstance.setValue(attribute,value); tmpInstance.setDataset(null); } } return tmpInstance; }
Example #20
Source File: ManhattanDataObject.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Constructs a new DataObject. The original instance is kept as instance-variable * @param originalInstance the original instance */ public ManhattanDataObject(Instance originalInstance, String key, Database database) { this.database = database; this.key = key; instance = originalInstance; clusterID = DataObject.UNCLASSIFIED; processed = false; c_dist = DataObject.UNDEFINED; r_dist = DataObject.UNDEFINED; }
Example #21
Source File: NominalToBinary.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Convert a single instance over. The converted instance is * added to the end of the output queue. * * @param instance the instance to convert */ private void convertInstance(Instance inst) { if (getInputFormat().classAttribute().isNominal()) { convertInstanceNominal(inst); } else { convertInstanceNumeric(inst); } }
Example #22
Source File: RDG1.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Resets the class values of all instances using voting. * For each instance the class value that satisfies the most rules * is choosen as new class value. * * @param dataset the dataset to work on * @return the changed instances * @throws Exception if something goes wrong */ private Instances voteDataset(Instances dataset) throws Exception { for (int i = 0; i < dataset.numInstances(); i++) { Instance inst = dataset.firstInstance(); inst = votedReclassifyExample(inst); dataset.add(inst); dataset.delete(0); } return dataset; }
Example #23
Source File: SpecPragmaticCreateDataset_posteriori_10.java From TableDisentangler with GNU General Public License v3.0 | 5 votes |
public void ProcessTables(int[] table_array) { DataBase(); int execCount = 0; try { String SQL = "SELECT * from ArtTable where HasXML='yes' and idTable in "+Arrays.toString(table_array); SQL = SQL.replace("[", "(").replace("]", ")"); Statement st = conn.createStatement(); Instances instances = CreateInstances(); FastVector fvWekaAttributes = new FastVector(48); rs = st.executeQuery(SQL); while (rs.next()) { Instance iExample = processTable(rs.getInt(1)); instances.add(iExample); execCount ++; if(execCount>10000){ conn.close(); DataBase(); execCount = 0; } } System.out.println(instances.toString()); ArffSaver saver = new ArffSaver(); saver.setInstances(instances); saver.setFile(new File("spptest10.arff")); //saver.setDestination(new File("./data/test.arff")); // **not** necessary in 3.5.4 and later saver.writeBatch(); } catch (Exception ex) { ex.printStackTrace(); } }
Example #24
Source File: FeatureSpace.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
public boolean containsInstance(final Instance instance) { boolean val = true; for (int i = 0; i < this.featureDomains.size(); i++) { FeatureDomain domain = this.featureDomains.get(i); val &= domain.contains(instance.value(i)); } return val; }
Example #25
Source File: ClassOrder.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Input an instance for filtering. Ordinarily the instance is processed * and made available for output immediately. Some filters require all * instances be read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be * collected with output(). * @throws IllegalStateException if no input format has been defined. */ public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } // In case some one use this routine in testing, // although he/she should not do so if(m_Converter != null){ Instance datum = (Instance)instance.copy(); if (!datum.isMissing(m_ClassAttribute)){ datum.setClassValue((double)m_Converter[(int)datum.classValue()]); } push(datum); return true; } if (!instance.isMissing(m_ClassAttribute)) { m_ClassCounts[(int)instance.classValue()] += instance.weight(); } bufferInput(instance); return false; }
Example #26
Source File: AbstractDensityBasedClusterer.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Computes the density for a given instance. * * @param instance the instance to compute the density for * @return the density. * @exception Exception if the density could not be computed successfully */ public double logDensityForInstance(Instance instance) throws Exception { double[] a = logJointDensitiesForInstance(instance); double max = a[Utils.maxIndex(a)]; double sum = 0.0; for(int i = 0; i < a.length; i++) { sum += Math.exp(a[i] - max); } return max + Math.log(sum); }
Example #27
Source File: RandomBinaryClassifier.java From anthelion with Apache License 2.0 | 5 votes |
@Override public double[] getVotesForInstance(Instance inst) { double[] re = new double[2]; if (rnd.nextBoolean()) { re[0] = 0; re[1] = 1; } else { re[0] = 1; re[1] = 0; } return re; }
Example #28
Source File: Classifier.java From KEEL with GNU General Public License v3.0 | 5 votes |
/** * Classifies the given test instance. The instance has to belong to a * dataset when it's being classified. Note that a classifier MUST * implement either this or distributionForInstance(). * * @param instance the instance to be classified * @return the predicted most likely class for the instance or * Instance.missingValue() if no prediction is made * @exception Exception if an error occurred during the prediction */ public double classifyInstance(Instance instance) throws Exception { double [] dist = distributionForInstance(instance); if (dist == null) { throw new Exception("Null distribution predicted"); } switch (instance.classAttribute().type()) { case AttributeWeka.NOMINAL: double max = 0; int maxIndex = 0; for (int i = 0; i < dist.length; i++) { if (dist[i] > max) { maxIndex = i; max = dist[i]; } } if (max > 0) { return maxIndex; } else { return Instance.missingValue(); } case AttributeWeka.NUMERIC: return dist[0]; default: return Instance.missingValue(); } }
Example #29
Source File: Filter.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Output an instance after filtering but do not remove from the * output queue. * * @return the instance that has most recently been filtered (or null if * the queue is empty). * @throws NullPointerException if no input structure has been defined */ public Instance outputPeek() { if (m_OutputFormat == null) { throw new NullPointerException("No output instance format defined"); } if (m_OutputQueue.empty()) { return null; } Instance result = (Instance)m_OutputQueue.peek(); return result; }
Example #30
Source File: LMTNode.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Returns the class probabilities for an instance given by the logistic model tree. * @param instance the instance * @return the array of probabilities */ public double[] distributionForInstance(Instance instance) throws Exception { double[] probs; if (m_isLeaf) { //leaf: use logistic model probs = modelDistributionForInstance(instance); } else { //sort into appropiate child node int branch = m_localModel.whichSubset(instance); probs = m_sons[branch].distributionForInstance(instance); } return probs; }