Java Code Examples for opennlp.tools.postag.POSTaggerME#tag()
The following examples show how to use
opennlp.tools.postag.POSTaggerME#tag() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Chapter1.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 6 votes |
private static void detectingPartsOfSpeechExample() { String sentence = "POS processing is useful for enhancing the " + "quality of data sent to other elements of a pipeline."; POSModel model = new POSModelLoader() .load(new File("C:/Current Books/NLP and Java/Models/", "en-pos-maxent.bin")); POSTaggerME tagger = new POSTaggerME(model); String tokens[] = WhitespaceTokenizer.INSTANCE .tokenize(sentence); String[] tags = tagger.tag(tokens); POSSample sample = new POSSample(tokens, tags); String posTokens[] = sample.getSentence(); String posTags[] = sample.getTags(); for (int i = 0; i < posTokens.length; i++) { System.out.print(posTokens[i] + " - " + posTags[i]); } System.out.println(); for (int i = 0; i < tokens.length; i++) { System.out.print(tokens[i] + "[" + tags[i] + "] "); } }
Example 2
Source File: OpenNLPAnnotator.java From Stargraph with MIT License | 6 votes |
@Override public List<Word> doRun(Language language, String sentence) { Tokenizer tokenizer = new TokenizerME(getTokenizerModel(language)); POSTaggerME tagger = new POSTaggerME(getPOSModel(language)); String[] tokens = tokenizer.tokenize(sentence); String[] tags = tagger.tag(tokens); PartOfSpeechSet posSet = PartOfSpeechSet.getPOSSet(language); List<Word> words = new ArrayList<>(); for (int i = 0; i < tokens.length; i++) { words.add(new Word(posSet.valueOf(tags[i]), tokens[i])); } return words; }
Example 3
Source File: LemmetizerUnitTest.java From tutorials with MIT License | 6 votes |
@Test public void givenEnglishDictionary_whenLemmatize_thenLemmasAreDetected() throws Exception { SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; String[] tokens = tokenizer.tokenize("John has a sister named Penny."); InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin"); POSModel posModel = new POSModel(inputStreamPOSTagger); POSTaggerME posTagger = new POSTaggerME(posModel); String tags[] = posTagger.tag(tokens); InputStream dictLemmatizer = getClass().getResourceAsStream("/models/en-lemmatizer.dict"); DictionaryLemmatizer lemmatizer = new DictionaryLemmatizer(dictLemmatizer); String[] lemmas = lemmatizer.lemmatize(tokens, tags); assertThat(lemmas).contains("O", "have", "a", "sister", "name", "O", "O"); }
Example 4
Source File: ChunkerUnitTest.java From tutorials with MIT License | 6 votes |
@Test public void givenChunkerModel_whenChunk_thenChunksAreDetected() throws Exception { SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; String[] tokens = tokenizer.tokenize("He reckons the current account deficit will narrow to only 8 billion."); InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin"); POSModel posModel = new POSModel(inputStreamPOSTagger); POSTaggerME posTagger = new POSTaggerME(posModel); String tags[] = posTagger.tag(tokens); InputStream inputStreamChunker = new FileInputStream("src/main/resources/models/en-chunker.bin"); ChunkerModel chunkerModel = new ChunkerModel(inputStreamChunker); ChunkerME chunker = new ChunkerME(chunkerModel); String[] chunks = chunker.chunk(tokens, tags); assertThat(chunks).contains("B-NP", "B-VP", "B-NP", "I-NP", "I-NP", "I-NP", "B-VP", "I-VP", "B-PP", "B-NP", "I-NP", "I-NP", "O"); }
Example 5
Source File: NLPExamples.java From Java-for-Data-Science with MIT License | 5 votes |
public void POSExample() { try (InputStream input = new FileInputStream( new File("en-pos-maxent.bin"));) { // To lower case example String lowerCaseVersion = sentence.toLowerCase(); out.println(lowerCaseVersion); // Pull out tokens List<String> list = new ArrayList<>(); Scanner scanner = new Scanner(sentence); while (scanner.hasNext()) { list.add(scanner.next()); } // Convert list to an array String[] words = new String[1]; words = list.toArray(words); // Build model POSModel posModel = new POSModel(input); POSTaggerME posTagger = new POSTaggerME(posModel); // Tag words String[] posTags = posTagger.tag(words); for (int i = 0; i < posTags.length; i++) { out.println(words[i] + " - " + posTags[i]); } // Find top sequences Sequence sequences[] = posTagger.topKSequences(words); for (Sequence sequence : sequences) { out.println(sequence); } } catch (IOException ex) { ex.printStackTrace(); } }
Example 6
Source File: BasicActions.java From knowledge-extraction with Apache License 2.0 | 5 votes |
public String[] testTagger(){ String[] tags = {}; try (InputStream modelIn = BasicActions.class.getClassLoader(). getResourceAsStream(Consts.EN_POS_MODEL);){ POSModel posModel = new POSModel(modelIn); POSTaggerME tagger = new POSTaggerME(posModel); tags = tagger.tag(testTokenizer()); System.out.println(Arrays.toString(tags)); } catch (IOException e) { e.printStackTrace(); } return tags; }
Example 7
Source File: POSTaggerUnitTest.java From tutorials with MIT License | 5 votes |
@Test public void givenPOSModel_whenPOSTagging_thenPOSAreDetected() throws Exception { SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; String[] tokens = tokenizer.tokenize("John has a sister named Penny."); InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin"); POSModel posModel = new POSModel(inputStreamPOSTagger); POSTaggerME posTagger = new POSTaggerME(posModel); String tags[] = posTagger.tag(tokens); assertThat(tags).contains("NNP", "VBZ", "DT", "NN", "VBN", "NNP", "."); }
Example 8
Source File: Chapter5.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 4 votes |
private static void usingOpenNLPPOSModel() { System.out.println("OpenNLP POSModel Examples"); try (InputStream modelIn = new FileInputStream( new File(getModelDir(), "en-pos-maxent.bin"));) { POSModel model = new POSModel(modelIn); POSTaggerME tagger = new POSTaggerME(model); // Introduction sentences // sentence = tokenizeSentence("The cow jumped over the moon."); // sentence = tokenizeSentence("Bill used the force to force the manger to tear the bill in two."); // sentence = tokenizeSentence("AFAIK she H8 cth!"); // sentence = tokenizeSentence("BTW had a GR8 tym at the party BBIAM."); // sentence = tokenizeSentence("Whether \"Blue\" was correct or not (it’s not) is debatable"); String tags[] = tagger.tag(sentence); double probs[] = tagger.probs(); for (int i = 0; i < sentence.length; i++) { System.out.print(sentence[i] + "/" + tags[i] + " "); } System.out.println(); // Use import opennlp.tools.util.Sequence; instead of // import opennlp.model.Sequence System.out.println("topSequences"); Sequence topSequences[] = tagger.topKSequences(sentence); for (int i = 0; i < topSequences.length; i++) { System.out.println(topSequences[i]); // List<String> list = topSequences[i].getOutcomes(); // for(String outcome : list) { // System.out.print(outcome + " "); // System.out.println(); // } } System.out.println(); System.out.println("occurrences and probabilities"); // DecimalFormat decimalFormat = new DecimalFormat("##.###"); for (int i = 0; i < topSequences.length; i++) { List<String> outcomes = topSequences[i].getOutcomes(); double probabilities[] = topSequences[i].getProbs(); for (int j = 0; j < outcomes.size(); j++) { System.out.printf("%s/%5.3f ",outcomes.get(j),probabilities[j]); } System.out.println(); } System.out.println(); // // // Getting the dictionasry tags // POSTaggerFactory ptf = model.getFactory(); // TagDictionary tagDictionary = ptf.getTagDictionary(); // String dictionaryTags[] = tagDictionary.getTags("the"); // System.out.println(dictionaryTags.length); // for(String word : dictionaryTags) { // System.out.println(word); // } } catch (IOException e) { e.printStackTrace(); } }
Example 9
Source File: Chapter5.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 4 votes |
private static void usingOpenNLPChunker() { try ( InputStream posModelStream = new FileInputStream( getModelDir() + "\\en-pos-maxent.bin"); InputStream chunkerStream = new FileInputStream( getModelDir() + "\\en-chunker.bin");) { POSModel model = new POSModel(posModelStream); POSTaggerME tagger = new POSTaggerME(model); // Used to create sample data for trainer // for (String sentence : sentences) { // String sen[] = tokenizeSentence(sentence); // String tags[] = tagger.tag(sen); // for (int i = 0; i < tags.length; i++) { //// for (String token : sentence) { // System.out.print(sen[i] + "/" + tags[i] + " "); // } // System.out.println(); // } // System.out.println(); String tags[] = tagger.tag(sentence); for (int i = 0; i < tags.length; i++) { // for (String token : sentence) { System.out.print(sentence[i] + "/" + tags[i] + " "); } System.out.println(); // chunker System.out.println("------------Chunker -----------"); ChunkerModel chunkerModel = new ChunkerModel(chunkerStream); ChunkerME chunkerME = new ChunkerME(chunkerModel); String result[] = chunkerME.chunk(sentence, tags); for (int i = 0; i < result.length; i++) { System.out.println("[" + sentence[i] + "] " + result[i]); } System.out.println("------------Chunker Spans -----------"); Span[] spans = chunkerME.chunkAsSpans(sentence, tags); for (Span span : spans) { System.out.print("Type: " + span.getType() + " - " + " Begin: " + span.getStart() + " End:" + span.getEnd() + " Length: " + span.length() + " ["); for (int j = span.getStart(); j < span.getEnd(); j++) { System.out.print(sentence[j] + " "); } System.out.println("]"); } } catch (IOException ex) { ex.printStackTrace(); } }
Example 10
Source File: OpenNlpPosRecommender.java From inception with Apache License 2.0 | 4 votes |
@Override public EvaluationResult evaluate(List<CAS> aCasses, DataSplitter aDataSplitter) throws RecommendationException { List<POSSample> data = extractPosSamples(aCasses); List<POSSample> trainingSet = new ArrayList<>(); List<POSSample> testSet = new ArrayList<>(); for (POSSample posSample : data) { switch (aDataSplitter.getTargetSet(posSample)) { case TRAIN: trainingSet.add(posSample); break; case TEST: testSet.add(posSample); break; default: // Do nothing break; } } int testSetSize = testSet.size(); int trainingSetSize = trainingSet.size(); double overallTrainingSize = data.size() - testSetSize; double trainRatio = (overallTrainingSize > 0) ? trainingSetSize / overallTrainingSize : 0.0; if (trainingSetSize < 2 || testSetSize < 2) { String info = String.format( "Not enough evaluation data: training set [%s] items, test set [%s] of total [%s]", trainingSetSize, testSetSize, data.size()); LOG.info(info); EvaluationResult result = new EvaluationResult(trainingSetSize, testSetSize, trainRatio); result.setEvaluationSkipped(true); result.setErrorMsg(info); return result; } LOG.info("Training on [{}] items, predicting on [{}] of total [{}]", trainingSet.size(), testSet.size(), data.size()); // Train model POSModel model = train(trainingSet, traits.getParameters()); if (model == null) { throw new RecommendationException("Model is null, cannot evaluate!"); } POSTaggerME tagger = new POSTaggerME(model); // Evaluate List<LabelPair> labelPairs = new ArrayList<>(); for (POSSample sample : testSet) { String[] predictedTags = tagger.tag(sample.getSentence()); String[] goldTags = sample.getTags(); for (int i = 0; i < predictedTags.length; i++) { labelPairs.add(new LabelPair(goldTags[i], predictedTags[i])); } } return labelPairs.stream().collect(EvaluationResult .collector(trainingSetSize, testSetSize, trainRatio, PAD)); }