opennlp.tools.postag.POSTaggerME#tag

Source File: Chapter1.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

6 votes

private static void detectingPartsOfSpeechExample() {
    String sentence = "POS processing is useful for enhancing the "
            + "quality of data sent to other elements of a pipeline.";

    POSModel model = new POSModelLoader()
            .load(new File("C:/Current Books/NLP and Java/Models/", "en-pos-maxent.bin"));
    POSTaggerME tagger = new POSTaggerME(model);

    String tokens[] = WhitespaceTokenizer.INSTANCE
            .tokenize(sentence);
    String[] tags = tagger.tag(tokens);

    POSSample sample = new POSSample(tokens, tags);
    String posTokens[] = sample.getSentence();
    String posTags[] = sample.getTags();
    for (int i = 0; i < posTokens.length; i++) {
        System.out.print(posTokens[i] + " - " + posTags[i]);
    }
    System.out.println();

    for (int i = 0; i < tokens.length; i++) {
        System.out.print(tokens[i] + "[" + tags[i] + "] ");
    }
}

Source File: OpenNLPAnnotator.java From Stargraph with MIT License

6 votes

@Override
public List<Word> doRun(Language language, String sentence) {
    Tokenizer tokenizer = new TokenizerME(getTokenizerModel(language));
    POSTaggerME tagger = new POSTaggerME(getPOSModel(language));
    String[] tokens = tokenizer.tokenize(sentence);
    String[] tags = tagger.tag(tokens);

    PartOfSpeechSet posSet = PartOfSpeechSet.getPOSSet(language);

    List<Word> words = new ArrayList<>();
    for (int i = 0; i < tokens.length; i++) {
        words.add(new Word(posSet.valueOf(tags[i]), tokens[i]));
    }

    return words;
}

Source File: LemmetizerUnitTest.java From tutorials with MIT License

6 votes

@Test
public void givenEnglishDictionary_whenLemmatize_thenLemmasAreDetected() throws Exception {

    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("John has a sister named Penny.");

    InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
    POSModel posModel = new POSModel(inputStreamPOSTagger);
    POSTaggerME posTagger = new POSTaggerME(posModel);
    String tags[] = posTagger.tag(tokens);
    InputStream dictLemmatizer = getClass().getResourceAsStream("/models/en-lemmatizer.dict");
    DictionaryLemmatizer lemmatizer = new DictionaryLemmatizer(dictLemmatizer);
    String[] lemmas = lemmatizer.lemmatize(tokens, tags);

    assertThat(lemmas).contains("O", "have", "a", "sister", "name", "O", "O");
}

Source File: ChunkerUnitTest.java From tutorials with MIT License

6 votes

@Test
public void givenChunkerModel_whenChunk_thenChunksAreDetected() throws Exception {

    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("He reckons the current account deficit will narrow to only 8 billion.");

    InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
    POSModel posModel = new POSModel(inputStreamPOSTagger);
    POSTaggerME posTagger = new POSTaggerME(posModel);
    String tags[] = posTagger.tag(tokens);

    InputStream inputStreamChunker = new FileInputStream("src/main/resources/models/en-chunker.bin");
    ChunkerModel chunkerModel = new ChunkerModel(inputStreamChunker);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    String[] chunks = chunker.chunk(tokens, tags);
    assertThat(chunks).contains("B-NP", "B-VP", "B-NP", "I-NP", "I-NP", "I-NP", "B-VP", "I-VP", "B-PP", "B-NP", "I-NP", "I-NP", "O");
}

Source File: NLPExamples.java From Java-for-Data-Science with MIT License

5 votes

public void POSExample() {
    try (InputStream input = new FileInputStream(
            new File("en-pos-maxent.bin"));) {

        // To lower case example
        String lowerCaseVersion = sentence.toLowerCase();
        out.println(lowerCaseVersion);

        // Pull out tokens
        List<String> list = new ArrayList<>();
        Scanner scanner = new Scanner(sentence);
        while (scanner.hasNext()) {
            list.add(scanner.next());
        }
        // Convert list to an array
        String[] words = new String[1];
        words = list.toArray(words);

        // Build model
        POSModel posModel = new POSModel(input);
        POSTaggerME posTagger = new POSTaggerME(posModel);

        // Tag words
        String[] posTags = posTagger.tag(words);
        for (int i = 0; i < posTags.length; i++) {
            out.println(words[i] + " - " + posTags[i]);
        }

        // Find top sequences
        Sequence sequences[] = posTagger.topKSequences(words);
        for (Sequence sequence : sequences) {
            out.println(sequence);
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}

Source File: BasicActions.java From knowledge-extraction with Apache License 2.0

5 votes

public String[] testTagger(){
	String[] tags = {};
	try (InputStream modelIn = BasicActions.class.getClassLoader().
				getResourceAsStream(Consts.EN_POS_MODEL);){
				
		POSModel posModel = new POSModel(modelIn);
		POSTaggerME tagger = new POSTaggerME(posModel);
		tags = tagger.tag(testTokenizer());
			System.out.println(Arrays.toString(tags));
	} catch (IOException e) {
		e.printStackTrace();
	}
	return tags;
}

Source File: POSTaggerUnitTest.java From tutorials with MIT License

5 votes

@Test
public void givenPOSModel_whenPOSTagging_thenPOSAreDetected() throws Exception {

    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("John has a sister named Penny.");

    InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
    POSModel posModel = new POSModel(inputStreamPOSTagger);
    POSTaggerME posTagger = new POSTaggerME(posModel);
    String tags[] = posTagger.tag(tokens);
    assertThat(tags).contains("NNP", "VBZ", "DT", "NN", "VBN", "NNP", ".");
}

Source File: Chapter5.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

4 votes

private static void usingOpenNLPPOSModel() {
        System.out.println("OpenNLP POSModel Examples");
        try (InputStream modelIn = new FileInputStream(
                new File(getModelDir(), "en-pos-maxent.bin"));) {
            POSModel model = new POSModel(modelIn);
            POSTaggerME tagger = new POSTaggerME(model);

            // Introduction sentences
//            sentence = tokenizeSentence("The cow jumped over the moon.");
//            sentence = tokenizeSentence("Bill used the force to force the manger to tear the bill in two.");
//            sentence = tokenizeSentence("AFAIK she H8 cth!");
//            sentence = tokenizeSentence("BTW had a GR8 tym at the party BBIAM.");
//            sentence = tokenizeSentence("Whether \"Blue\" was correct or not (it’s not) is debatable");
            String tags[] = tagger.tag(sentence);
            double probs[] = tagger.probs();

            for (int i = 0; i < sentence.length; i++) {
                System.out.print(sentence[i] + "/" + tags[i] + " ");
            }
            System.out.println();
            // Use import opennlp.tools.util.Sequence; instead of
            // import opennlp.model.Sequence
            System.out.println("topSequences");
            Sequence topSequences[] = tagger.topKSequences(sentence);
            for (int i = 0; i < topSequences.length; i++) {
                System.out.println(topSequences[i]);
//                List<String> list = topSequences[i].getOutcomes();
//                for(String outcome : list) {
//                    System.out.print(outcome + " ");
//                    System.out.println();
//                }
            }
            System.out.println();

            System.out.println("occurrences and probabilities");
//            DecimalFormat decimalFormat = new DecimalFormat("##.###");
            for (int i = 0; i < topSequences.length; i++) {
                List<String> outcomes = topSequences[i].getOutcomes();
                double probabilities[] = topSequences[i].getProbs();
                for (int j = 0; j < outcomes.size(); j++) {
                    System.out.printf("%s/%5.3f ",outcomes.get(j),probabilities[j]);
                }
                System.out.println();
            }
            System.out.println();
//            
//            // Getting the dictionasry tags
//            POSTaggerFactory ptf = model.getFactory();
//            TagDictionary tagDictionary = ptf.getTagDictionary();
//            String dictionaryTags[] = tagDictionary.getTags("the");
//            System.out.println(dictionaryTags.length);
//            for(String word : dictionaryTags) {
//                 System.out.println(word);
//            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

Source File: Chapter5.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

4 votes

private static void usingOpenNLPChunker() {
        try (
                InputStream posModelStream = new FileInputStream(
                        getModelDir() + "\\en-pos-maxent.bin");
                InputStream chunkerStream = new FileInputStream(
                        getModelDir() + "\\en-chunker.bin");) {
                    POSModel model = new POSModel(posModelStream);
                    POSTaggerME tagger = new POSTaggerME(model);
                    
                    // Used to create sample data for trainer
//                    for (String sentence : sentences) {
//                        String sen[] = tokenizeSentence(sentence);
//                        String tags[] = tagger.tag(sen);
//                        for (int i = 0; i < tags.length; i++) {
////                    for (String token : sentence) {
//                            System.out.print(sen[i] + "/" + tags[i] + " ");
//                        }
//                        System.out.println();
//                    }
//                    System.out.println();

                    String tags[] = tagger.tag(sentence);
                    for (int i = 0; i < tags.length; i++) {
//                    for (String token : sentence) {
                        System.out.print(sentence[i] + "/" + tags[i] + " ");
                    }
                    System.out.println();

                    // chunker
                    System.out.println("------------Chunker -----------");
                    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
                    ChunkerME chunkerME = new ChunkerME(chunkerModel);
                    String result[] = chunkerME.chunk(sentence, tags);

                    for (int i = 0; i < result.length; i++) {
                        System.out.println("[" + sentence[i] + "] " + result[i]);
                    }

                    System.out.println("------------Chunker Spans -----------");
                    Span[] spans = chunkerME.chunkAsSpans(sentence, tags);
                    for (Span span : spans) {
                        System.out.print("Type: " + span.getType() + " - " + " Begin: "
                                + span.getStart() + " End:" + span.getEnd()
                                + " Length: " + span.length() + "  [");
                        for (int j = span.getStart(); j < span.getEnd(); j++) {
                            System.out.print(sentence[j] + " ");
                        }
                        System.out.println("]");
                    }
                } catch (IOException ex) {
                    ex.printStackTrace();
                }

    }

Source File: OpenNlpPosRecommender.java From inception with Apache License 2.0

4 votes

@Override
public EvaluationResult evaluate(List<CAS> aCasses, DataSplitter aDataSplitter)
    throws RecommendationException
{        
    List<POSSample> data = extractPosSamples(aCasses);
    List<POSSample> trainingSet = new ArrayList<>();
    List<POSSample> testSet = new ArrayList<>();

    for (POSSample posSample : data) {
        switch (aDataSplitter.getTargetSet(posSample)) {
        case TRAIN:
            trainingSet.add(posSample);
            break;
        case TEST:
            testSet.add(posSample);
            break;
        default:
            // Do nothing
            break;
        }
    }

    int testSetSize = testSet.size();
    int trainingSetSize = trainingSet.size();
    double overallTrainingSize = data.size() - testSetSize;
    double trainRatio = (overallTrainingSize > 0) ? trainingSetSize / overallTrainingSize : 0.0;
    
    if (trainingSetSize < 2 || testSetSize < 2) {
        String info = String.format(
                "Not enough evaluation data: training set [%s] items, test set [%s] of total [%s]",
                trainingSetSize, testSetSize, data.size());
        LOG.info(info);

        EvaluationResult result = new EvaluationResult(trainingSetSize,
                testSetSize, trainRatio);
        result.setEvaluationSkipped(true);
        result.setErrorMsg(info);
        return result;
    }

    LOG.info("Training on [{}] items, predicting on [{}] of total [{}]", trainingSet.size(),
        testSet.size(), data.size());

    // Train model
    POSModel model = train(trainingSet, traits.getParameters());
    if (model == null) {
        throw new RecommendationException("Model is null, cannot evaluate!");
    }

    POSTaggerME tagger = new POSTaggerME(model);

    // Evaluate
    List<LabelPair> labelPairs = new ArrayList<>();
    for (POSSample sample : testSet) {
        String[] predictedTags = tagger.tag(sample.getSentence());
        String[] goldTags = sample.getTags();
        for (int i = 0; i < predictedTags.length; i++) {
            labelPairs.add(new LabelPair(goldTags[i], predictedTags[i]));
        }
    }

    return labelPairs.stream().collect(EvaluationResult
            .collector(trainingSetSize, testSetSize, trainRatio, PAD));
}

Java Code Examples for opennlp.tools.postag.POSTaggerME#tag()