Java Code Examples for edu.stanford.nlp.tagger.maxent.MaxentTagger#tokenizeText()

The following examples show how to use edu.stanford.nlp.tagger.maxent.MaxentTagger#tokenizeText() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: CoreNLPAnnotator.java From Stargraph with MIT License

6 votes

@Override
protected List<Word> doRun(Language language, String sentence) {
    MaxentTagger tagger = taggers.computeIfAbsent(language, lang -> {
        if (lang == EN) {
            return new MaxentTagger("edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
        }
        throw new UnsupportedLanguageException(lang);
    });

    PartOfSpeechSet partOfSpeechSet = PartOfSpeechSet.getPOSSet(language);
    List<Word> words = new ArrayList<>();

    List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new StringReader(sentence));
    sentences.forEach(s -> {
        tagger.tagSentence(s).forEach(taggedWord ->
                words.add(new Word(partOfSpeechSet.valueOf(taggedWord.tag()), taggedWord.value())));
    });

    return words;
}

Example 2

Source File: CorenlpPipeline.java From datashare with GNU Affero General Public License v3.0

6 votes

/**
 * Part-of-Speech Classification (Maximum entropy) only
 *
 * @param input    the string to annotator
 * @param hash     the input hash code
 * @param language the input language
 */
private Annotations processPosClassifier(String input, String hash, Language language) throws InterruptedException {
    Annotations annotations = new Annotations(hash, getType(), language);
    LOGGER.info("POS-tagging for " + language.toString());

    // Split input into sentences
    final CoreNlpAnnotator<MaxentTagger> nlpAnnotator;
    nlpAnnotator = CoreNlpPosModels.getInstance().get(language);
    List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new StringReader(input));
    for (List<HasWord> sentence : sentences) {
        // NlpTag with parts-of-speech
        List<TaggedWord> taggedSentence = nlpAnnotator.annotator.tagSentence(sentence);
        // Feed annotatopn
        for (TaggedWord word : taggedSentence) {
            int begin = word.beginPosition();
            int end = word.endPosition();
            String pos = word.tag(); // like line 157 we don't use POS tagging
            annotations.add(POS, begin, end);
        }
    }
    return annotations;
}

Example 3

Source File: StanfordPosTagger.java From OpenEphyra with GNU General Public License v2.0

6 votes

/**
 * Splits the sentence into individual tokens.
 * 
 * @param sentence Input sentence
 * @return Array of tokens
 */
public static String[] tokenize(String sentence) {
	List t = MaxentTagger.tokenizeText(new StringReader(sentence));
	
	List<String> tokens = new ArrayList<String>();
	
	for (int j = 0; j < t.size(); j++) {
		Sentence s1 = (Sentence) t.get(j);
		
		for (int i = 0; i < s1.length(); i++) {
			HasWord w = s1.getHasWord(i);
			tokens.add(w.word());
		}
	}
	
	return (String[]) tokens.toArray(new String[tokens.size()]);
}

Example 4

Source File: Chapter5.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

5 votes

private static void usingStanfordMaxentPOS() {
        try {
            MaxentTagger tagger = new MaxentTagger(getModelDir() + "//wsj-0-18-bidirectional-distsim.tagger");
//            MaxentTagger tagger = new MaxentTagger(getModelDir() + "//gate-EN-twitter.model");
//            System.out.println(tagger.tagString("AFAIK she H8 cth!"));
//            System.out.println(tagger.tagString("BTW had a GR8 tym at the party BBIAM."));
            List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader("sentences.txt")));
            for (List<HasWord> sentence : sentences) {
                List<TaggedWord> taggedSentence = tagger.tagSentence(sentence);
                // Simple display
                System.out.println("---" + taggedSentence);
                // Simple conversion to String
//                System.out.println(Sentence.listToString(taggedSentence, false));
                // Display of words and tags
//                for (TaggedWord taggedWord : taggedSentence) {
//                    System.out.print(taggedWord.word() + "/" + taggedWord.tag() + " ");
//                }
//                System.out.println();
                // List of specifc tags
//                System.out.print("NN Tagged: ");
//                for (TaggedWord taggedWord : taggedSentence) {
//                    if (taggedWord.tag().startsWith("NN")) {
//                        System.out.print(taggedWord.word() + " ");
//                    }
//                }
//                System.out.println();
            }
        } catch (FileNotFoundException ex) {
            ex.printStackTrace();
        }
    }

Example 5

Source File: StanfordPOSTagger.java From jatecs with GNU General Public License v3.0

5 votes

public Vector<ArrayList<TaggedWord>> tag(String input) {
    Vector<ArrayList<TaggedWord>> returnVector = new Vector<ArrayList<TaggedWord>>();
    List<List<HasWord>> sentences = MaxentTagger
            .tokenizeText(new BufferedReader(new StringReader(input)));
    for (List<? extends HasWord> sentence : sentences) {
        returnVector.add(tagger.tagSentence(sentence));
    }
    return returnVector;
}

Example 6

Source File: TaggerDemo.java From blog-codes with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception { 
	InputStream input = TaggerDemo.class.getResourceAsStream("/"+MaxentTagger.DEFAULT_JAR_PATH);

	MaxentTagger tagger = new MaxentTagger(input);
	
	List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new StringReader("Karma of humans is AI"));

	for (List<HasWord> sentence : sentences) {

		List<TaggedWord> tSentence = tagger.tagSentence(sentence);

		System.out.println(SentenceUtils.listToString(tSentence, false));

	}

}