weka.core.SparseInstance Java Examples
The following examples show how to use
weka.core.SparseInstance.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Model.java From AIDR with GNU Affero General Public License v3.0 | 6 votes |
Instance wordsToInstance(WordSet words) { Instance item = new SparseInstance( attributeSpecification.numAttributes()); item.setDataset(attributeSpecification); // Words for (String word : words.getWords()) { Attribute attribute = attributeSpecification.attribute(word); if (attribute != null) { item.setValue(attribute, 1); } } item.replaceMissingValues(missingVal); return item; }
Example #2
Source File: TweetPreprocessor.java From sentiment-analysis with Apache License 2.0 | 6 votes |
private void setLexiconInstances(){ ArrayList<Attribute> atts = new ArrayList<Attribute>(6); ArrayList<String> classVal = new ArrayList<String>(); classVal.add("positive"); classVal.add("negative"); atts.add(new Attribute("verb")); atts.add(new Attribute("noun")); atts.add(new Attribute("adj")); atts.add(new Attribute("adv")); atts.add(new Attribute("wordnet")); atts.add(new Attribute("polarity")); atts.add(new Attribute("sentimentClassAttribute",classVal)); Instances textRaw = new Instances("TextInstances",atts,0); double[] vals = lp.getProcessed(tweet, tagger); textRaw.add(new SparseInstance(1.0, vals)); lexicon_instances = new Instances(textRaw); }
Example #3
Source File: TweetPreprocessor.java From sentiment-analysis with Apache License 2.0 | 6 votes |
/**Instantiates the complex-based Instances*/ private String getComplexInstances(String processed_text){ ArrayList<Attribute> atts = new ArrayList<Attribute>(2); ArrayList<String> classVal = new ArrayList<String>(); classVal.add("positive"); classVal.add("negative"); atts.add(new Attribute("sentimentClassAttribute",classVal)); atts.add(new Attribute("text",(ArrayList<String>)null)); Instances textRaw = new Instances("TextInstances",atts,0); double[] instanceValue1 = new double[textRaw.numAttributes()]; String tmp_cmplx = cp.getProcessed(processed_text, tagger); instanceValue1[1] = textRaw.attribute(1).addStringValue(tmp_cmplx); textRaw.add(new SparseInstance(1.0, instanceValue1)); complex_instances = new Instances(textRaw); return tmp_cmplx; }
Example #4
Source File: TweetPreprocessor.java From sentiment-analysis with Apache License 2.0 | 6 votes |
/**Instantiates the text-based Instances*/ private String getTextInstances(){ ArrayList<Attribute> atts = new ArrayList<Attribute>(2); ArrayList<String> classVal = new ArrayList<String>(); classVal.add("positive"); classVal.add("negative"); atts.add(new Attribute("sentimentClassAttribute",classVal)); atts.add(new Attribute("text",(ArrayList<String>)null)); Instances textRaw = new Instances("TextInstances",atts,0); double[] instanceValue1 = new double[textRaw.numAttributes()]; String tmp_txt = tp.getProcessed(tweet); instanceValue1[1] = textRaw.attribute(1).addStringValue(tmp_txt); textRaw.add(new SparseInstance(1.0, instanceValue1)); text_instances = new Instances(textRaw); return tmp_txt; }
Example #5
Source File: CLOPE.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Add instance to cluster */ public void AddInstance(Instance inst) { if (inst instanceof SparseInstance) { // System.out.println("AddSparceInstance"); for (int i = 0; i < inst.numValues(); i++) { AddItem(inst.index(i)); // for(int i=0;i<inst.numAttributes();int++){ // AddItem(inst.index(i)+inst.value(i)); } } else { for (int i = 0; i < inst.numAttributes(); i++) { if (!inst.isMissing(i)) { AddItem(i + inst.toString(i)); } } } this.W = this.occ.size(); this.N++; }
Example #6
Source File: CLOPE.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Delete instance from cluster */ public void DeleteInstance(Instance inst) { if (inst instanceof SparseInstance) { // System.out.println("DeleteSparceInstance"); for (int i = 0; i < inst.numValues(); i++) { DeleteItem(inst.index(i)); } } else { for (int i = 0; i <= inst.numAttributes() - 1; i++) { if (!inst.isMissing(i)) { DeleteItem(i + inst.toString(i)); } } } this.W = this.occ.size(); this.N--; }
Example #7
Source File: FPGrowth.java From tsml with GNU General Public License v3.0 | 6 votes |
private void processSingleton(Instance current, ArrayList<BinaryItem> singletons) throws Exception { if (current instanceof SparseInstance) { for (int j = 0; j < current.numValues(); j++) { int attIndex = current.index(j); singletons.get(attIndex).increaseFrequency(); } } else { for (int j = 0; j < current.numAttributes(); j++) { if (!current.isMissing(j)) { if (current.attribute(j).numValues() == 1 || current.value(j) == m_positiveIndex - 1) { singletons.get(j).increaseFrequency(); } } } } }
Example #8
Source File: SentimentAnalyser.java From sentiment-analysis with Apache License 2.0 | 6 votes |
/**Decides upon a "disagreed" document by applying the learned model based on the last 1,000 "agreed" documents.*/ private String clarifyOnSlidingWindow(String tweet){ String out = ""; double[] instanceValues = new double[train.numAttributes()]; instanceValues[0] = train.attribute(0).addStringValue(tweet); train.add(new SparseInstance(1.0, instanceValues)); try { stwv.setInputFormat(train); Instances newData = Filter.useFilter(train, stwv); Instances train_ins = new Instances(newData, 0, train.size()-1); Instances test_ins = new Instances(newData, train.size()-1, 1); Classifier mnb = (Classifier)new NaiveBayesMultinomial(); mnb.buildClassifier(train_ins); double[] preds = mnb.distributionForInstance(test_ins.get(0)); if (preds[0]>0.5) out = "positive"; else out = "negative"; } catch (Exception e) { e.printStackTrace(); } train.remove(train.numInstances()-1); return out; }
Example #9
Source File: PartitionMembership.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Convert a single instance over. The converted instance is added to * the end of the output queue. * * @param instance the instance to convert * @throws Exception if something goes wrong */ protected void convertInstance(Instance instance) throws Exception { // Make copy and set weight to one Instance cp = (Instance)instance.copy(); cp.setWeight(1.0); // Set up values double [] instanceVals = new double[outputFormatPeek().numAttributes()]; double [] vals = m_partitionGenerator.getMembershipValues(cp); System.arraycopy(vals, 0, instanceVals, 0, vals.length); if (instance.classIndex() >= 0) { instanceVals[instanceVals.length - 1] = instance.classValue(); } push(new SparseInstance(instance.weight(), instanceVals)); }
Example #10
Source File: WekaHierarchicalClustering2.java From Java-Data-Analysis with MIT License | 5 votes |
private static Instances load(double[][] data) { ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("X")); attributes.add(new Attribute("Y")); Instances dataset = new Instances("Dataset", attributes, M); for (double[] datum : data) { Instance instance = new SparseInstance(2); instance.setValue(0, datum[0]); instance.setValue(1, datum[1]); dataset.add(instance); } return dataset; }
Example #11
Source File: ClusteringTask.java From mzmine3 with GNU General Public License v2.0 | 5 votes |
/** * Creates the weka data set for clustering of variables (metabolites) * * @param rawData Data extracted from selected Raw data files and rows. * @return Weka library data set */ private Instances createVariableWekaDataset(double[][] rawData) { FastVector attributes = new FastVector(); for (int i = 0; i < this.selectedRawDataFiles.length; i++) { String varName = "Var" + i; Attribute var = new Attribute(varName); attributes.addElement(var); } if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) { Attribute name = new Attribute("name", (FastVector) null); attributes.addElement(name); } Instances data = new Instances("Dataset", attributes, 0); for (int i = 0; i < selectedRows.length; i++) { double[] values = new double[data.numAttributes()]; System.arraycopy(rawData[i], 0, values, 0, rawData[0].length); if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) { DecimalFormat twoDForm = new DecimalFormat("#.##"); double MZ = Double.valueOf(twoDForm.format(selectedRows[i].getAverageMZ())); double RT = Double.valueOf(twoDForm.format(selectedRows[i].getAverageRT())); String rowName = "MZ->" + MZ + "/RT->" + RT; values[data.numAttributes() - 1] = data.attribute("name").addStringValue(rowName); } Instance inst = new SparseInstance(1.0, values); data.add(inst); } return data; }
Example #12
Source File: ClusteringTask.java From mzmine2 with GNU General Public License v2.0 | 5 votes |
/** * Creates the weka data set for clustering of samples * * @param rawData Data extracted from selected Raw data files and rows. * @return Weka library data set */ private Instances createSampleWekaDataset(double[][] rawData) { FastVector attributes = new FastVector(); for (int i = 0; i < rawData[0].length; i++) { String varName = "Var" + i; Attribute var = new Attribute(varName); attributes.addElement(var); } if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) { Attribute name = new Attribute("name", (FastVector) null); attributes.addElement(name); } Instances data = new Instances("Dataset", attributes, 0); for (int i = 0; i < rawData.length; i++) { double[] values = new double[data.numAttributes()]; System.arraycopy(rawData[i], 0, values, 0, rawData[0].length); if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) { values[data.numAttributes() - 1] = data.attribute("name").addStringValue(this.selectedRawDataFiles[i].getName()); } Instance inst = new SparseInstance(1.0, values); data.add(inst); } return data; }
Example #13
Source File: ClusteringTask.java From mzmine2 with GNU General Public License v2.0 | 5 votes |
/** * Creates the weka data set for clustering of variables (metabolites) * * @param rawData Data extracted from selected Raw data files and rows. * @return Weka library data set */ private Instances createVariableWekaDataset(double[][] rawData) { FastVector attributes = new FastVector(); for (int i = 0; i < this.selectedRawDataFiles.length; i++) { String varName = "Var" + i; Attribute var = new Attribute(varName); attributes.addElement(var); } if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) { Attribute name = new Attribute("name", (FastVector) null); attributes.addElement(name); } Instances data = new Instances("Dataset", attributes, 0); for (int i = 0; i < selectedRows.length; i++) { double[] values = new double[data.numAttributes()]; System.arraycopy(rawData[i], 0, values, 0, rawData[0].length); if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) { DecimalFormat twoDForm = new DecimalFormat("#.##"); double MZ = Double.valueOf(twoDForm.format(selectedRows[i].getAverageMZ())); double RT = Double.valueOf(twoDForm.format(selectedRows[i].getAverageRT())); String rowName = "MZ->" + MZ + "/RT->" + RT; values[data.numAttributes() - 1] = data.attribute("name").addStringValue(rowName); } Instance inst = new SparseInstance(1.0, values); data.add(inst); } return data; }
Example #14
Source File: NutchOnlineClassifier.java From anthelion with Apache License 2.0 | 5 votes |
/** * Converts an {@link AnthURL} into an {@link Instance} which can be handled * by the {@link Classifier}. * * @param url * the {@link AnthURL} which should be transformed/converted. * @return the resulting {@link Instance}. */ private static Instance convert(AnthURL url) { if (url != null) { Instance inst = new SparseInstance(dimension); inst.replaceMissingValues(replaceMissingValues); inst.setDataset(instances); inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem")); inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("domain"), url.uri.getHost()); Set<String> tokens = new HashSet<String>(); tokens.addAll(tokenizer(url.uri.getPath())); tokens.addAll(tokenizer(url.uri.getQuery())); tokens.addAll(tokenizer(url.uri.getFragment())); for (String tok : tokens) { inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1); } return inst; } else { System.out.println("Input AnthURL for convertion into instance was null."); return null; } }
Example #15
Source File: NutchOnlineClassifier.java From anthelion with Apache License 2.0 | 5 votes |
/** * Converts an {@link AnthURL} into an {@link Instance} which can be handled * by the {@link Classifier}. * * @param url * the {@link AnthURL} which should be transformed/converted. * @return the resulting {@link Instance}. */ private static Instance convert(AnthURL url) { if (url != null) { Instance inst = new SparseInstance(dimension); inst.replaceMissingValues(replaceMissingValues); inst.setDataset(instances); inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem")); inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("domain"), url.uri.getHost()); Set<String> tokens = new HashSet<String>(); tokens.addAll(tokenizer(url.uri.getPath())); tokens.addAll(tokenizer(url.uri.getQuery())); tokens.addAll(tokenizer(url.uri.getFragment())); for (String tok : tokens) { inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1); } return inst; } else { System.out.println("Input AnthURL for convertion into instance was null."); return null; } }
Example #16
Source File: SentimentAnalyser.java From sentiment-analysis with Apache License 2.0 | 5 votes |
/**Decides upon a "disagreed" document by applying the learned model based on the previously build model.*/ private String clarifyOnModel(String tweet){ String out = ""; // get the text-based representation of the document double[] instanceValues = new double[2]; instanceValues[0] = test.attribute(0).addStringValue(tweet); test.add(new SparseInstance(1.0, instanceValues)); try{ stwv.setInputFormat(test); Instances newData = Filter.useFilter(test, stwv); // re-order attributes so that they are compatible with the training set's ones Instances test_instance = reformatText(newData); // find the polarity of the document based on the previously built model test_instance.setClassIndex(0); double[] preds = multiNB.distributionForInstance(test_instance.get(0)); if (preds[0]>0.5) out = "light positive"; else out = "light negative"; } catch (Exception e){ e.printStackTrace(); } test.remove(0); return out; }
Example #17
Source File: TweetPreprocessor.java From sentiment-analysis with Apache License 2.0 | 5 votes |
/**Initializes the feature-based Instances*/ private void getFeatureInstances(){ ArrayList<Attribute> atts = new ArrayList<Attribute>(2); ArrayList<String> classVal = new ArrayList<String>(); classVal.add("positive"); classVal.add("negative"); atts.add(new Attribute("sentimentClassAttribute",classVal)); atts.add(new Attribute("text",(ArrayList<String>)null)); Instances textRaw = new Instances("TextInstances",atts,0); double[] instanceValue1 = new double[textRaw.numAttributes()]; instanceValue1[1] = textRaw.attribute(1).addStringValue(fp.getProcessed(tweet)); textRaw.add(new SparseInstance(1.0, instanceValue1)); feature_instances = new Instances(textRaw); }
Example #18
Source File: KMeans.java From Java-Data-Analysis with MIT License | 5 votes |
private static Instances load(double[][] data) { ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("X")); attributes.add(new Attribute("Y")); Instances dataset = new Instances("Dataset", attributes, M); for (double[] datum : data) { Instance instance = new SparseInstance(2); instance.setValue(0, datum[0]); instance.setValue(1, datum[1]); dataset.add(instance); } return dataset; }
Example #19
Source File: ClusteringTask.java From mzmine3 with GNU General Public License v2.0 | 5 votes |
/** * Creates the weka data set for clustering of samples * * @param rawData Data extracted from selected Raw data files and rows. * @return Weka library data set */ private Instances createSampleWekaDataset(double[][] rawData) { FastVector attributes = new FastVector(); for (int i = 0; i < rawData[0].length; i++) { String varName = "Var" + i; Attribute var = new Attribute(varName); attributes.addElement(var); } if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) { Attribute name = new Attribute("name", (FastVector) null); attributes.addElement(name); } Instances data = new Instances("Dataset", attributes, 0); for (int i = 0; i < rawData.length; i++) { double[] values = new double[data.numAttributes()]; System.arraycopy(rawData[i], 0, values, 0, rawData[0].length); if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) { values[data.numAttributes() - 1] = data.attribute("name").addStringValue(this.selectedRawDataFiles[i].getName()); } Instance inst = new SparseInstance(1.0, values); data.add(inst); } return data; }
Example #20
Source File: BagOfPatterns.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override public double classifyInstance(Instance instance) throws Exception { //convert to BOP form double[] hist = bop.bagToArray(bop.buildBag(instance)); //stuff into Instance Instances newInsts = new Instances(matrix, 1); //copy attribute data newInsts.add(new SparseInstance(1.0, hist)); return knn.classifyInstance(newInsts.firstInstance()); }
Example #21
Source File: BagOfPatterns.java From tsml with GNU General Public License v3.0 | 5 votes |
@Override public double[] distributionForInstance(Instance instance) throws Exception { //convert to BOP form double[] hist = bop.bagToArray(bop.buildBag(instance)); //stuff into Instance Instances newInsts = new Instances(matrix, 1); //copy attribute data newInsts.add(new SparseInstance(1.0, hist)); return knn.distributionForInstance(newInsts.firstInstance()); }
Example #22
Source File: FPGrowth.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Inserts a single instance into the FPTree. * * @param current the instance to insert * @param singletons the singleton item sets * @param tree the tree to insert into * @param minSupport the minimum support threshold */ private void insertInstance(Instance current, ArrayList<BinaryItem> singletons, FPTreeRoot tree, int minSupport) { ArrayList<BinaryItem> transaction = new ArrayList<BinaryItem>(); if (current instanceof SparseInstance) { for (int j = 0; j < current.numValues(); j++) { int attIndex = current.index(j); if (singletons.get(attIndex).getFrequency() >= minSupport) { transaction.add(singletons.get(attIndex)); } } Collections.sort(transaction); tree.addItemSet(transaction, 1); } else { for (int j = 0; j < current.numAttributes(); j++) { if (!current.isMissing(j)) { if (current.attribute(j).numValues() == 1 || current.value(j) == m_positiveIndex - 1) { if (singletons.get(j).getFrequency() >= minSupport) { transaction.add(singletons.get(j)); } } } } Collections.sort(transaction); tree.addItemSet(transaction, 1); } }
Example #23
Source File: PrincipalComponents.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Convert a pc transformed instance back to the original space * * @param inst the instance to convert * @return the processed instance * @throws Exception if something goes wrong */ private Instance convertInstanceToOriginal(Instance inst) throws Exception { double[] newVals = null; if (m_hasClass) { newVals = new double[m_numAttribs+1]; } else { newVals = new double[m_numAttribs]; } if (m_hasClass) { // class is always appended as the last attribute newVals[m_numAttribs] = inst.value(inst.numAttributes() - 1); } for (int i = 0; i < m_eTranspose[0].length; i++) { double tempval = 0.0; for (int j = 1; j < m_eTranspose.length; j++) { tempval += (m_eTranspose[j][i] * inst.value(j - 1)); } newVals[i] = tempval; if (!m_center) { newVals[i] *= m_stdDevs[i]; } newVals[i] += m_means[i]; } if (inst instanceof SparseInstance) { return new SparseInstance(inst.weight(), newVals); } else { return new DenseInstance(inst.weight(), newVals); } }
Example #24
Source File: WekaHierarchicalClustering.java From Java-Data-Analysis with MIT License | 5 votes |
private static Instances load(double[][] data) { ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("X")); attributes.add(new Attribute("Y")); Instances dataset = new Instances("Dataset", attributes, M); for (double[] datum : data) { Instance instance = new SparseInstance(2); instance.setValue(0, datum[0]); instance.setValue(1, datum[1]); dataset.add(instance); } return dataset; }
Example #25
Source File: PrincipalComponents.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Transform an instance in original (unormalized) format. Convert back * to the original space if requested. * @param instance an instance in the original (unormalized) format * @return a transformed instance * @throws Exception if instance cant be transformed */ public Instance convertInstance(Instance instance) throws Exception { if (m_eigenvalues == null) { throw new Exception("convertInstance: Principal components not " +"built yet"); } double[] newVals = new double[m_outputNumAtts]; Instance tempInst = (Instance)instance.copy(); if (!instance.dataset().equalHeaders(m_trainHeader)) { throw new Exception("Can't convert instance: header's don't match: " +"PrincipalComponents\n" + instance.dataset().equalHeadersMsg(m_trainHeader)); } m_replaceMissingFilter.input(tempInst); m_replaceMissingFilter.batchFinished(); tempInst = m_replaceMissingFilter.output(); /*if (m_normalize) { m_normalizeFilter.input(tempInst); m_normalizeFilter.batchFinished(); tempInst = m_normalizeFilter.output(); }*/ m_nominalToBinFilter.input(tempInst); m_nominalToBinFilter.batchFinished(); tempInst = m_nominalToBinFilter.output(); if (m_attributeFilter != null) { m_attributeFilter.input(tempInst); m_attributeFilter.batchFinished(); tempInst = m_attributeFilter.output(); } if (!m_center) { m_standardizeFilter.input(tempInst); m_standardizeFilter.batchFinished(); tempInst = m_standardizeFilter.output(); } else { m_centerFilter.input(tempInst); m_centerFilter.batchFinished(); tempInst = m_centerFilter.output(); } if (m_hasClass) { newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex()); } double cumulative = 0; for (int i = m_numAttribs - 1; i >= 0; i--) { double tempval = 0.0; for (int j = 0; j < m_numAttribs; j++) { tempval += (m_eigenvectors[j][m_sortedEigens[i]] * tempInst.value(j)); } newVals[m_numAttribs - i - 1] = tempval; cumulative+=m_eigenvalues[m_sortedEigens[i]]; if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) { break; } } if (!m_transBackToOriginal) { if (instance instanceof SparseInstance) { return new SparseInstance(instance.weight(), newVals); } else { return new DenseInstance(instance.weight(), newVals); } } else { if (instance instanceof SparseInstance) { return convertInstanceToOriginal(new SparseInstance(instance.weight(), newVals)); } else { return convertInstanceToOriginal(new DenseInstance(instance.weight(), newVals)); } } }
Example #26
Source File: SAXVSM.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * If skip = one of <0 ... numInstances-1>, will not include instance at that index into the corpus * Part of leave one out cv, while avoiding unnecessary repeats of the BoP transformation */ private Instances tfxidf(Instances bopData, int skip) { int numClasses = bopData.numClasses(); int numInstances = bopData.numInstances(); int numTerms = bopData.numAttributes()-1; //minus class attribute //initialise class weights double[][] classWeights = new double[numClasses][numTerms]; //build class bags int inst = 0; for (Instance in : bopData) { if (inst++ == skip) //skip 'this' one, for leave-one-out cv continue; int classVal = (int)in.classValue(); for (int j = 0; j < numTerms; ++j) { classWeights[classVal][j] += in.value(j); } } //apply tf x idf for (int i = 0; i < numTerms; ++i) { //for each term double df = 0; //document frequency for (int j = 0; j < numClasses; ++j) //find how many classes (documents) this term appears in if (classWeights[j][i] != 0) ++df; if (df != 0) { //if it appears if (df != numClasses) { //but not in all, apply weighting for (int j = 0; j < numClasses; ++j) if (classWeights[j][i] != 0) classWeights[j][i] = Math.log(1 + classWeights[j][i]) * Math.log(numClasses / df); } else { //appears in all //avoid log calculations //if df == num classes -> idf = log(N/df) = log(1) = 0 for (int j = 0; j < numClasses; ++j) classWeights[j][i] = 0; } } } Instances tfxidfCorpus = new Instances(bopData, numClasses); for (int i = 0; i < numClasses; ++i) tfxidfCorpus.add(new SparseInstance(1.0, classWeights[i])); return tfxidfCorpus; }
Example #27
Source File: CLOPE.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Calculate Delta */ public double DeltaAdd(Instance inst, double r) { //System.out.println("DeltaAdd"); int S_new; int W_new; double profit; double profit_new; double deltaprofit; S_new = 0; W_new = occ.size(); if (inst instanceof SparseInstance) { //System.out.println("DeltaAddSparceInstance"); for (int i = 0; i < inst.numValues(); i++) { S_new++; if ((Integer) this.occ.get(inst.index(i)) == null) { W_new++; } } } else { for (int i = 0; i < inst.numAttributes(); i++) { if (!inst.isMissing(i)) { S_new++; if ((Integer) this.occ.get(i + inst.toString(i)) == null) { W_new++; } } } } S_new += S; if (N == 0) { deltaprofit = S_new / Math.pow(W_new, r); } else { profit = S * N / Math.pow(W, r); profit_new = S_new * (N + 1) / Math.pow(W_new, r); deltaprofit = profit_new - profit; } return deltaprofit; }
Example #28
Source File: CLOPE.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Move instance to best cluster */ public int MoveInstanceToBestCluster(Instance inst) { clusters.get(m_clusterAssignments.get(m_processed_InstanceID)).DeleteInstance(inst); m_clusterAssignments.set(m_processed_InstanceID, -1); double delta; double deltamax; int clustermax = -1; int tempS = 0; int tempW = 0; if (inst instanceof SparseInstance) { for (int i = 0; i < inst.numValues(); i++) { tempS++; tempW++; } } else { for (int i = 0; i < inst.numAttributes(); i++) { if (!inst.isMissing(i)) { tempS++; tempW++; } } } deltamax = tempS / Math.pow(tempW, m_Repulsion); for (int i = 0; i < clusters.size(); i++) { CLOPECluster tempcluster = clusters.get(i); delta = tempcluster.DeltaAdd(inst, m_Repulsion); // System.out.println("delta " + delta); if (delta > deltamax) { deltamax = delta; clustermax = i; } } if (clustermax == -1) { CLOPECluster newcluster = new CLOPECluster(); clusters.add(newcluster); newcluster.AddInstance(inst); return clusters.size() - 1; } clusters.get(clustermax).AddInstance(inst); return clustermax; }
Example #29
Source File: AnthOnlineClassifier.java From anthelion with Apache License 2.0 | 4 votes |
/** * Converts an {@link AnthURL} into an {@link Instance} which can be handled * by the {@link Classifier}. * * @param url * the {@link AnthURL} which should be transformed/converted. * @return the resulting {@link Instance}. */ private Instance convert(AnthURL url) { if (url != null) { try { Instance inst = new SparseInstance(dimension); inst.replaceMissingValues(replaceMissingValues); inst.setDataset(instances); inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem")); inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("domain"), url.uri.getHost()); Set<String> tokens = new HashSet<String>(); tokens.addAll(tokenizer(url.uri.getPath())); tokens.addAll(tokenizer(url.uri.getQuery())); tokens.addAll(tokenizer(url.uri.getFragment())); for (String tok : tokens) { inst.setValue(attributesIndex .get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1); } return inst; } catch (NullPointerException npe) { System.out .println("Could not convert AnthURL into Instance for classification of URL: " + (url != null ? (url.uri != null ? url.uri .toString() : "URI null") : "AnthURL null.")); return null; } } else { System.out .println("Input AnthURL for convertion into instance was null."); return null; } }
Example #30
Source File: ReduceDimensionFilter.java From anthelion with Apache License 2.0 | 4 votes |
/** * Returns the next instances based on the configuration of this class. */ public Instance nextInstance() { Instance inst = this.inputStream.nextInstance(); Instance newInst = new SparseInstance(hashSize + notHashableAttributes.size()); newInst.setDataset(newInstances); newInst.replaceMissingValues(replacementArray); if (newInstances.size() > 0) newInstances.remove(0); // newInstances.add(0, newInst); for (int i = 0; i < inst.numAttributes(); i++) { if (inst.classIndex() == i) { newInst.setValue( attributesIndex.get(inst.classAttribute().name()), inst.classValue()); } else { // check if attributes should be manipulated if (ignoreAttributes.contains(i)) { inst.setValue(i, 0); } if (makeBinaryAttributes.contains(i) && inst.value(i) > 0) { inst.setValue(i, 1); } // check what should be done with the attributes. if (notHashableAttributes.contains(i)) { newInst.setValue( attributesIndex.get(inst.attribute(i).name()), inst.value(i)); } else { // calculate the hash of the attribute name which is // included in // the vector and set it to 1 if (inst.value(i) > 0) { newInst.setValue(attributesIndex .get(getAttributeNameOfHash(getHash(inst .attribute(i).name(), hashSize))), 1); } } } } // System.out.println(newInst.toString()); return newInst; }