weka.filters.unsupervised.attribute.StringToWordVector Java Examples
The following examples show how to use
weka.filters.unsupervised.attribute.StringToWordVector.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Trainer.java From sentiment-analysis with Apache License 2.0 | 6 votes |
/**Returns the text-based Representations.*/ private Instances getText(String fileText) throws Exception{ DataSource ds = new DataSource(fileText); Instances data = ds.getDataSet(); data.setClassIndex(1); StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(data); filter.setLowerCaseTokens(true); filter.setMinTermFreq(1); filter.setUseStoplist(false); filter.setTFTransform(false); filter.setIDFTransform(false); filter.setWordsToKeep(1000000000); NGramTokenizer tokenizer = new NGramTokenizer(); tokenizer.setNGramMinSize(2); tokenizer.setNGramMaxSize(2); filter.setTokenizer(tokenizer); Instances newData = weka.filters.Filter.useFilter(data, filter); return newData; }
Example #2
Source File: Trainer.java From sentiment-analysis with Apache License 2.0 | 6 votes |
/**Returns the Feature-based Representations.*/ private Instances getFeature(String fileFeature) throws Exception{ DataSource ds = new DataSource(fileFeature); Instances data = ds.getDataSet(); data.setClassIndex(1); StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(data); filter.setLowerCaseTokens(true); filter.setMinTermFreq(1); filter.setUseStoplist(false); filter.setTFTransform(false); filter.setIDFTransform(false); filter.setWordsToKeep(1000000000); NGramTokenizer tokenizer = new NGramTokenizer(); tokenizer.setNGramMinSize(1); tokenizer.setNGramMaxSize(1); filter.setTokenizer(tokenizer); Instances newData = weka.filters.Filter.useFilter(data, filter); return newData; }
Example #3
Source File: Trainer.java From sentiment-analysis with Apache License 2.0 | 6 votes |
/**Returns the Combined (text+POS) Representations.*/ private Instances getComplex(String fileComplex) throws Exception{ DataSource ds = new DataSource(fileComplex); Instances data = ds.getDataSet(); data.setClassIndex(1); StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(data); filter.setLowerCaseTokens(true); filter.setMinTermFreq(1); filter.setUseStoplist(false); filter.setTFTransform(false); filter.setIDFTransform(false); filter.setWordsToKeep(1000000000); NGramTokenizer tokenizer = new NGramTokenizer(); tokenizer.setNGramMinSize(2); tokenizer.setNGramMaxSize(2); filter.setTokenizer(tokenizer); Instances newData = weka.filters.Filter.useFilter(data, filter); return newData; }
Example #4
Source File: SentimentAnalyser.java From sentiment-analysis with Apache License 2.0 | 5 votes |
/**StringToWordVector filter initialization.*/ private void initializeFilter(){ stwv = new StringToWordVector(); stwv.setLowerCaseTokens(true); stwv.setMinTermFreq(1); stwv.setUseStoplist(false); stwv.setTFTransform(false); stwv.setIDFTransform(false); stwv.setWordsToKeep(1000000000); NGramTokenizer tokenizer = new NGramTokenizer(); tokenizer.setNGramMinSize(2); tokenizer.setNGramMaxSize(2); stwv.setTokenizer(tokenizer); stwv.setAttributeIndices("first"); }
Example #5
Source File: PolarityClassifier.java From sentiment-analysis with Apache License 2.0 | 5 votes |
/**Initializes the StringToWordVector filter to be used in the representations.*/ private void initialiseTextFilter(){ stwv = new StringToWordVector(); stwv.setLowerCaseTokens(true); stwv.setMinTermFreq(1); stwv.setUseStoplist(false); stwv.setTFTransform(false); stwv.setIDFTransform(false); stwv.setWordsToKeep(1000000000); NGramTokenizer tokenizer = new NGramTokenizer(); tokenizer.setNGramMinSize(2); tokenizer.setNGramMaxSize(2); stwv.setTokenizer(tokenizer); }
Example #6
Source File: DecisionTreeEstimator.java From jMetal with MIT License | 4 votes |
public double doPrediction(int index,S testSolution) { double result = 0.0d; try { int numberOfObjectives = solutionList.get(0).getNumberOfObjectives(); //Attributes //numeric Attribute attr = new Attribute("my-numeric"); //nominal ArrayList<String> myNomVals = new ArrayList<>(); for (int i=0; i<numberOfObjectives; i++) myNomVals.add(VALUE_STRING+i); Attribute attr1 = new Attribute(NOMINAL_STRING, myNomVals); //System.out.println(attr1.isNominal()); //string Attribute attr2 = new Attribute(MY_STRING, (List<String>)null); //System.out.println(attr2.isString()); //2.create dataset ArrayList<Attribute> attrs = new ArrayList<>(); attrs.add(attr); attrs.add(attr1); attrs.add(attr2); Instances dataset = new Instances("my_dataset", attrs, 0); //Add instances for (S solution : solutionList) { //instaces for (int i = 0; i <numberOfObjectives ; i++) { double[] attValues = new double[dataset.numAttributes()]; attValues[0] = solution.getObjective(i); attValues[1] = dataset.attribute(NOMINAL_STRING).indexOfValue(VALUE_STRING+i); attValues[2] = dataset.attribute(MY_STRING).addStringValue(solution.toString()+i); dataset.add(new DenseInstance(1.0, attValues)); } } //DataSet test Instances datasetTest = new Instances("my_dataset_test", attrs, 0); //Add instances for (int i = 0; i < numberOfObjectives; i++) { Instance test = new DenseInstance(3); test.setValue(attr, testSolution.getObjective(i)); test.setValue(attr1, VALUE_STRING+i); test.setValue(attr2, testSolution.toString()+i); datasetTest.add(test); // dataset.add(test); } //split to 70:30 learn and test set //Preprocess strings (almost no classifier supports them) StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(dataset); dataset = Filter.useFilter(dataset, filter); //Buid classifier dataset.setClassIndex(1); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //resample if needed //dataset = dataset.resample(new Random(42)); dataset.setClassIndex(1); datasetTest.setClassIndex(1); //do eval Evaluation eval = new Evaluation(datasetTest); //trainset eval.evaluateModel(classifier, datasetTest); //testset result = classifier.classifyInstance(datasetTest.get(index)); } catch (Exception e) { result = testSolution.getObjective(index); } return result; }
Example #7
Source File: DecisionTreeEstimator.java From jMetal with MIT License | 4 votes |
public double doPredictionVariable(int index,S testSolution) { double result = 0.0d; try { int numberOfVariables = solutionList.get(0).getNumberOfVariables(); //Attributes //numeric Attribute attr = new Attribute("my-numeric"); //nominal ArrayList<String> myNomVals = new ArrayList<>(); for (int i=0; i<numberOfVariables; i++) myNomVals.add(VALUE_STRING+i); Attribute attr1 = new Attribute(NOMINAL_STRING, myNomVals); //string Attribute attr2 = new Attribute(MY_STRING, (List<String>)null); //2.create dataset ArrayList<Attribute> attrs = new ArrayList<>(); attrs.add(attr); attrs.add(attr1); attrs.add(attr2); Instances dataset = new Instances("my_dataset", attrs, 0); //Add instances for (S solution : solutionList) { //instaces for (int i = 0; i <numberOfVariables ; i++) { double[] attValues = new double[dataset.numAttributes()]; attValues[0] = ((DoubleSolution)solution).getVariable(i); attValues[1] = dataset.attribute(NOMINAL_STRING).indexOfValue(VALUE_STRING+i); attValues[2] = dataset.attribute(MY_STRING).addStringValue(solution.toString()+i); dataset.add(new DenseInstance(1.0, attValues)); } } //DataSet test Instances datasetTest = new Instances("my_dataset_test", attrs, 0); //Add instances for (int i = 0; i < numberOfVariables; i++) { Instance test = new DenseInstance(3); test.setValue(attr, ((DoubleSolution)testSolution).getVariable(i)); test.setValue(attr1, VALUE_STRING+i); test.setValue(attr2, testSolution.toString()+i); datasetTest.add(test); // dataset.add(test); } //split to 70:30 learn and test set //Preprocess strings (almost no classifier supports them) StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(dataset); dataset = Filter.useFilter(dataset, filter); //Buid classifier dataset.setClassIndex(1); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //resample if needed //dataset = dataset.resample(new Random(42)); dataset.setClassIndex(1); datasetTest.setClassIndex(1); //do eval Evaluation eval = new Evaluation(datasetTest); //trainset eval.evaluateModel(classifier, datasetTest); //testset result = classifier.classifyInstance(datasetTest.get(index)); } catch (Exception e) { result = ((DoubleSolution)testSolution).getVariable(index); } return result; }