edu.stanford.nlp.ling.CoreLabel#get

Source File: Postprocess.java From phrases with Apache License 2.0

6 votes

public List<Pattern> run(List<Pattern> patterns) {

        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, sentiment");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

        for (Pattern pattern : patterns) {
            Annotation annotation = pipeline.process(pattern.toSentences());
            for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                    Tree tree = sentence.get(SentimentCoreAnnotations.AnnotatedTree.class);
                    int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
                    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);

                    }
            }
        }
        return null;
    }

Source File: Chapter4.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

6 votes

private static void usingStanfordNER() {
        String model = getModelDir() + "\\english.conll.4class.distsim.crf.ser.gz";
        CRFClassifier<CoreLabel> classifier = CRFClassifier.getClassifierNoExceptions(model);

        String sentence = "";
        for (String element : sentences) {
            sentence += element;
        }

        List<List<CoreLabel>> entityList = classifier.classify(sentence);

        for (List<CoreLabel> internalList : entityList) {
            for (CoreLabel coreLabel : internalList) {
                String word = coreLabel.word();
                String category = coreLabel.get(CoreAnnotations.AnswerAnnotation.class);
//                System.out.println(word + ":" + category);
                if (!"O".equals(category)) {
                    System.out.println(word + ":" + category);
                }

            }

        }
    }

Source File: ItalianReadability.java From tint with GNU General Public License v3.0

6 votes

static public void addDescriptionForm(String form, HashMap<Integer, Integer> indexes, int start,
        int numberOfTokens, TreeMap<Integer, DescriptionForm> forms, Annotation annotation,
        HashMap<String, GlossarioEntry> glossario) {
    Integer lemmaIndex = indexes.get(start);
    if (lemmaIndex == null) {
        return;
    }

    CoreLabel firstToken = annotation.get(CoreAnnotations.TokensAnnotation.class).get(lemmaIndex);
    CoreLabel endToken = annotation.get(CoreAnnotations.TokensAnnotation.class)
            .get(lemmaIndex + numberOfTokens - 1);
    Integer beginOffset = firstToken.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    Integer endOffset = endToken.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);

    GlossarioEntry glossarioEntry = glossario.get(form);
    if (glossarioEntry == null) {
        return;
    }

    DescriptionForm descriptionForm = new DescriptionForm(
            beginOffset, endOffset, glossarioEntry);

    forms.put(beginOffset, descriptionForm);
}

Source File: ItalianReadability.java From tint with GNU General Public License v3.0

6 votes

@Override public void addingContentWord(CoreLabel token) {
    super.addingContentWord(token);
    HashMap<Integer, HashMultimap<String, String>> easyWords = model.getEasyWords();
    String simplePos = getGenericPos(token.get(CoreAnnotations.PartOfSpeechAnnotation.class));
    String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);

    token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4);

    if (easyWords.get(3).get(simplePos).contains(lemma)) {
        level3WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3);
    }
    if (easyWords.get(2).get(simplePos).contains(lemma)) {
        level2WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2);
    }
    if (easyWords.get(1).get(simplePos).contains(lemma)) {
        level1WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1);
    }
}

Source File: NerWithDepartmentTest.java From InformationExtraction with GNU General Public License v3.0

6 votes

public static List<String> extractNER(String doc){
    Annotation document = new Annotation(doc);

    pipeline.annotate(document);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    List<String> result = new ArrayList<String>();
    for(CoreMap sentence: sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        for (CoreLabel token: sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            // this is the text of the token
            String word = token.get(CoreAnnotations.TextAnnotation.class);
            // this is the NER label of the token
            String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
            result.add(ne);
            System.out.println(word + "\t" + ne);
        }
    }
    return result;
}

Source File: CoreNlpToken.java From jstarcraft-nlp with Apache License 2.0

5 votes

@Override
public CoreNlpToken next() {
    CoreLabel label = iterator.next();
    text = label.get(CoreAnnotations.TextAnnotation.class);
    nature = label.get(CoreAnnotations.PartOfSpeechAnnotation.class);
    begin = label.beginPosition();
    end = label.endPosition();
    return this;
}

Source File: StopwordAnnotatorTest.java From coreNlp with Apache License 2.0

5 votes

/**
 * Test to validate that stopwords are properly annotated in the token list
 * @throws Exception
 */
@org.junit.Test
public void testLuceneStopwordList() throws Exception {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, stopword");
    props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(example);
    pipeline.annotate(document);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);

    //get the standard lucene stopword set
    Set<?> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;

    for (CoreLabel token : tokens) {

        //get the stopword annotation
        Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);

        String word = token.word().toLowerCase();
        if (stopWords.contains(word)) {
            assertTrue(stopword.first());
        }
        else {
            assertFalse(stopword.first());
        }

        //not checking lemma, so always false
        assertFalse(stopword.second());
    }
}

Source File: Chapter8.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

5 votes

private static void usingStanfordPipelineParallel() {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    String path = "C:\\Current Books\\NLP and Java\\Downloads\\stanford-ner-2014-10-26\\classifiers";
    props.put("ner.model", path + "/english.muc.7class.distsim.crf.ser.gz");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    Annotation annotation1 = new Annotation("The robber took the cash and ran.");
    Annotation annotation2 = new Annotation("The policeman chased him down the street.");
    Annotation annotation3 = new Annotation("A passerby, watching the action, tripped the thief as he passed by.");
    Annotation annotation4 = new Annotation("They all lived happily everafter, except for the thief of course.");
    ArrayList<Annotation> list = new ArrayList();
    list.add(annotation1);
    list.add(annotation2);
    list.add(annotation3);
    list.add(annotation4);
    Iterable<Annotation> iterable = list;

    pipeline.annotate(iterable);

    System.out.println("Total time: " + pipeline.timingInformation());
    List<CoreMap> sentences = annotation2.get(SentencesAnnotation.class);

    for (CoreMap sentence : sentences) {
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class);
            System.out.println("Word: " + word + " POS Tag: " + pos);
        }
    }
}

Source File: CoreNlpTokenizer.java From jstarcraft-nlp with Apache License 2.0

5 votes

@Override
public boolean incrementToken() {
    clearAttributes();
    while (tokens == null || !tokens.hasNext())
        if (!getNextSentence())
            return false;
    CoreLabel token = tokens.next();
    // Use the lemmatized word:
    String word = token.get(LemmaAnnotation.class);
    if (word == null) { // Fallback when no lemmatization happens.
        word = token.get(TextAnnotation.class);
    }
    termAttribute.setLength(0);
    termAttribute.append(word);
    // NER or part of speech annotation
    String pos = token.get(NamedEntityTagAnnotation.class);
    pos = (pos == null || "O".equals(pos)) ? token.get(PartOfSpeechAnnotation.class) : pos;
    typeAttribute.setType(pos != null ? pos : TypeAttribute.DEFAULT_TYPE);
    // Token character offsets
    int be = token.get(CharacterOffsetBeginAnnotation.class).intValue();
    int en = token.get(CharacterOffsetEndAnnotation.class).intValue();
    offsetAttribute.setOffset(be, en);
    // Token in-document position increment:
    positionAttribute.setPositionIncrement(1 + skippedTokens);
    skippedTokens = 0;
    return true;
}

Source File: CoreNLP.java From gAnswer with BSD 3-Clause "New" or "Revised" License

5 votes

public Word[] getTaggedWords (String sentence) {
	CoreMap taggedSentence = getPOS(sentence);
	Word[] ret = new Word[taggedSentence.get(TokensAnnotation.class).size()];
	int count = 0;
	for (CoreLabel token : taggedSentence.get(TokensAnnotation.class)) {
		// this is the text of the token
		String word = token.get(TextAnnotation.class);
		// this is the POS tag of the token
		String pos = token.get(PartOfSpeechAnnotation.class);
		//System.out.println(word+"["+pos+"]");
		ret[count] = new Word(getBaseFormOfPattern(word.toLowerCase()), word, pos, count+1);
		count ++;
	}
	return ret;
}

Source File: CoreNlpExample.java From core-nlp-example with MIT License

5 votes

public static void main(String[] args) {

        // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution
        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

        // read some text in the text variable
        String text = "What is the Weather in Bangalore right now?";

        // create an empty Annotation just with the given text
        Annotation document = new Annotation(text);

        // run all Annotators on this text
        pipeline.annotate(document);

        List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

        for (CoreMap sentence : sentences) {
            // traversing the words in the current sentence
            // a CoreLabel is a CoreMap with additional token-specific methods
            for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                // this is the text of the token
                String word = token.get(CoreAnnotations.TextAnnotation.class);
                // this is the POS tag of the token
                String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                // this is the NER label of the token
                String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);

                System.out.println(String.format("Print: word: [%s] pos: [%s] ne: [%s]", word, pos, ne));
            }
        }
    }

Source File: DigiCompMorphAnnotator.java From tint with GNU General Public License v3.0

5 votes

@Override
public void annotate(Annotation annotation) {
    if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
            for (CoreLabel c : tokens) {
                String[] morph_fatures = c.get(DigiMorphAnnotations.MorphoAnnotation.class).split(" ");
                String lemma = c.get(CoreAnnotations.LemmaAnnotation.class);
                if (morph_fatures.length > 1) {
                    List<String> comps = new ArrayList<>();
                    for (String m : morph_fatures) {
                        if (m.startsWith(lemma + "+") || m.startsWith(lemma + "~")) {
                            comps.add(m);
                        }
                    }
                    c.set(DigiMorphAnnotations.MorphoCompAnnotation.class, comps);
                } else {

                    if (morph_fatures[0].startsWith(lemma + "+") || morph_fatures[0].startsWith(lemma + "~")) {
                        c.set(DigiMorphAnnotations.MorphoCompAnnotation.class,
                                new ArrayList<String>(Arrays.asList(morph_fatures[0])));
                    }
                }
            }
        }
    }
}

Source File: StopwordAnnotatorTest.java From coreNlp with Apache License 2.0

5 votes

/**
 * Test to validate that the custom stopword list words
 * @throws Exception
 */
@org.junit.Test
public void testCustomStopwordList() throws Exception {

    //setup coreNlp properties for stopwords. Note the custom stopword list property
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, stopword");
    props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator");
    props.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList);

    //get the custom stopword set
    Set<?> stopWords = StopwordAnnotator.getStopWordList(Version.LUCENE_36, customStopWordList, true);

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(example);
    pipeline.annotate(document);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);
    for (CoreLabel token : tokens) {

        //get the stopword annotation
        Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);

        String word = token.word().toLowerCase();
        if (stopWords.contains(word)) {
            assertTrue(stopword.first());
        }
        else {
            assertFalse(stopword.first());
        }

        //not checking lemma, so always false
        assertFalse(stopword.second());
    }
}

Source File: ReplaceSubordinateRule.java From tint with GNU General Public License v3.0

4 votes

@Override public String apply(Annotation annotation, Map<Integer, HashMultimap<Integer, Integer>> children) {

        InverseDigiMorph dm = new InverseDigiMorph();

        int conj = 0;
        List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
        CoreMap sentence = sentences.get(0);

        //

        SemanticGraph semanticGraph = sentence
                .get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);

        IndexedWord node = semanticGraph.getNodeByIndex(conj + 1);
        List<IndexedWord> history = getHistory(semanticGraph, node);
        if (history.size() == 1) {
            return null;
        }
        IndexedWord verb = history.get(1);
        CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(verb.index() - 1);
        String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
        if (!pos.startsWith("V")) {
            return null;
        }

        // todo: check subject in parse tree
        // todo: check clitics

        String morpho = token.get(DigiMorphAnnotations.MorphoAnnotation.class);
        String[] parts = morpho.split("\\s+");
        TreeSet<String> persons = new TreeSet<>();
        String tempo = null;
        for (int i = 1; i < parts.length; i++) {
            String[] vParts = parts[i].split("\\+");
            if (!vParts[1].equals("v")) {
                continue;
            }

            String modo = vParts[2];
            if (!modo.equals("cong")) {
                continue;
            }

            tempo = vParts[3];
            persons.add(vParts[5] + "+" + vParts[6]);
        }

        IndexedWord next = null;
        if (persons.size() != 1) {
            for (int i = 2; i < history.size(); i++) {
                if (history.get(i).get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("V")) {
                    next = history.get(i);
                    break;
                }
            }
            persons = getPersons(semanticGraph, next, sentence);
        }

        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append(token.lemma());
        stringBuffer.append("+v+indic+").append(tempo);
        stringBuffer.append("+nil+");

        // Add person
        stringBuffer.append(persons.last());

        String find = stringBuffer.toString();

        System.out.println(find);
        String inverseMorphology = dm.getInverseMorphology(find);

        System.out.println(inverseMorphology);
        System.out.println(morpho);
        System.out.println(tempo);
        System.out.println(persons);

//        System.out.println(annotation.get(UDPipeAnnotations.UDPipeOriginalAnnotation.class));
//        System.out.println(sentence.get(CoreAnnotations.TokensAnnotation.class).get(2)
//                .get(UDPipeAnnotations.FeaturesAnnotation.class));
//        System.out.println(token
//                .get(UDPipeAnnotations.FeaturesAnnotation.class));
//
//        System.out.println(children.get(0).get(verb.index()));
//        System.out.println(children);
//        System.out.println(verb.get(UDPipeAnnotations.FeaturesAnnotation.class));

//        try {
//            System.out.println(JSONOutputter.jsonPrint(annotation));
//        } catch (IOException e) {
//            e.printStackTrace();
//        }
//        System.out.println(getHistory(semanticGraph, node));
//        System.out.println(semanticGraph.getOutEdgesSorted(node));
//        System.out.println(semanticGraph.getIncomingEdgesSorted(node));
//        System.out.println(node);
        return null;
    }

Source File: CoreNLPPosTagger.java From Heracles with GNU General Public License v3.0

4 votes

/**
	 * Process the Dataset in chunks, as defined by the <code>spanType</code> parameter.
	 * The Spans denoted by spanType must each contain Words belonging to a single sentence.
	 * 
	 */
	@Override
	public void validatedProcess(Dataset dataset, String spanTypeOfSentenceUnit){
//		if (dataset.getPerformedNLPTasks().contains(getTask())){
//			Framework.error("This dataset has already been tagged with POS.");
//			return;
//		}
		//check if prerequisites are satisfied
		if (!dataset.getPerformedNLPTasks().containsAll(prerequisites)){
			HashSet<NLPTask> missingTasks = new HashSet<>();
			missingTasks.addAll(prerequisites);
			missingTasks.removeAll(dataset.getPerformedNLPTasks());
			Framework.error("This dataset does not meet the requirements to use this component! Missing tasks: " + missingTasks);
			return;
		}
		
		Properties prop1 = new Properties();
		prop1.setProperty("annotators", "pos");
		StanfordCoreNLP pipeline = new StanfordCoreNLP(prop1, false);
		
		for (Span span : dataset.getSpans(spanTypeOfSentenceUnit)){

			
			HashMap<Integer, Word> wordIndex = new HashMap<>();
			Annotation a = CoreNLPHelper.reconstructStanfordAnnotations(span, wordIndex);
			if (a == null){
				System.out.println(a);
			}
			pipeline.annotate(a);
			List<CoreMap> sentenceAnnotations = a.get(SentencesAnnotation.class);
			for (CoreMap sentence : sentenceAnnotations){
				for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
					
					Word w = wordIndex.get(token.get(CharacterOffsetBeginAnnotation.class));
					String tempPos = token.get(PartOfSpeechAnnotation.class);
					if (w.hasAnnotation("URI")){
						w.putAnnotation("pos", "NNP");
					} else {
						w.putAnnotation("pos", tempPos);
					}
//					System.out.println(w.getAnnotations());
				}
			

				
				
			}
		}		
	}

Source File: CoreNLPToJSON.java From phrasal with GNU General Public License v3.0

4 votes

/**
 * Process an English text file.
 * 
 * @param args
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {
  if (args.length < 1) {
    System.err.printf("Usage: java %s file [inputproperties_str] > json_output%n", CoreNLPToJSON.class.getName());
    System.exit(-1);
  }
  String textFile = args[0];
  InputProperties inputProperties = args.length > 1 ? InputProperties.fromString(args[1]) : new InputProperties();

  StanfordCoreNLP coreNLP = new StanfordCoreNLP(properties);
  
  // Configure tokenizer
  EnglishPreprocessor preprocessor = new EnglishPreprocessor(true);
  
  // Use a map with ordered keys so that the output is ordered by segmentId.
  Map<Integer,SourceSegment> annotations = new TreeMap<Integer,SourceSegment>();
  LineNumberReader reader = IOTools.getReaderFromFile(textFile);
  for (String line; (line = reader.readLine()) != null;) {
    Annotation annotation = coreNLP.process(line);
    List<CoreMap> sentences = annotation.get(SentencesAnnotation.class);
    if (sentences.size() != 1) {
      throw new RuntimeException("Sentence splitting on line: " + String.valueOf(reader.getLineNumber()));
    }
    CoreMap sentence = sentences.get(0);
    Tree tree = sentence.get(TreeAnnotation.class);
    tree.indexLeaves();
    int[] chunkVector = getChunkVector(tree);
    List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
    int numTokens = tokens.size();
    SymmetricalWordAlignment alignment = preprocessor.processAndAlign(line);
    if (alignment.e().size() != numTokens) {
      throw new RuntimeException(String.format("Tokenizer configurations differ: %d/%d", alignment.e().size(), numTokens));
    }
    SourceSegment segment = new SourceSegment(numTokens);
    segment.layoutSpec.addAll(makeLayoutSpec(alignment));
    segment.inputProperties = inputProperties.toString();
    for (int j = 0; j < numTokens; ++j) {
      CoreLabel token = tokens.get(j);
      String word = token.get(TextAnnotation.class);
      segment.tokens.add(unescape(word));
      String pos = mapPOS(token.get(PartOfSpeechAnnotation.class));
      segment.pos.add(pos);
      String ne = token.get(NamedEntityTagAnnotation.class);
      segment.ner.add(ne);
      segment.chunkVector[j] = chunkVector[j];
    }
    annotations.put(reader.getLineNumber()-1, segment);
  }
  reader.close();
  System.err.printf("Processed %d sentences%n", reader.getLineNumber());
  
  final SourceDocument jsonDocument = new SourceDocument(textFile, annotations);
  
  // Convert to json
  Gson gson = new Gson();
  String json = gson.toJson(jsonDocument);
  System.out.println(json);
}

Source File: Readability.java From tint with GNU General Public License v3.0

4 votes

public void addWord(CoreLabel token) {
        token.set(ReadabilityAnnotations.ContentWord.class, false);
        token.set(ReadabilityAnnotations.LiteralWord.class, false);

        String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
//        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
        String word = token.word();

        addingToken(token);

        if (isWordPos(pos)) {
            addingWord(token);
            wordCount++;
            docLenLettersOnly += token.endPosition() - token.beginPosition();

            word = flattenToAscii(word);
            Hyphenation hyphenation = hyphenator.hyphenate(word);

            boolean done = false;
            if (hyphenation != null) {
                try {
                    String h = hyphenation.toString();
                    incrementHyphenCount(hyphenation.length() + 1);
                    token.set(ReadabilityAnnotations.HyphenationAnnotation.class, h);
                    done = true;
                    hyphenWordCount++;
                } catch (Exception e) {
                    // ignored
                }
            }

            if (!done && word.length() < 5) {
                incrementHyphenCount(1);
                hyphenWordCount++;
            }

            if (isContentPos(pos)) {
                contentWordSize++;
                addingContentWord(token);
            }
            if (isEasyPos(pos)) {
                contentEasyWordSize++;
                addingEasyWord(token);
            }
        }
        if (token.get(ReadabilityAnnotations.HyphenationAnnotation.class) == null) {
            token.set(ReadabilityAnnotations.HyphenationAnnotation.class, token.originalText());
        }

        String genericPos = getGenericPos(pos);
        posStats.add(pos);
        genericPosStats.add(genericPos);
    }

Source File: POSExample.java From core-nlp-example with MIT License

4 votes

public static void main(String[] args) {

        StanfordCoreNLP stanfordCoreNLP = Pipeline.getPipeline();

        String text = "Hey! I am Dinesh Krishnan.";

        CoreDocument coreDocument = new CoreDocument(text);

        stanfordCoreNLP.annotate(coreDocument);

        List<CoreLabel> coreLabelList = coreDocument.tokens();

        for(CoreLabel coreLabel : coreLabelList) {

            String pos = coreLabel.get(CoreAnnotations.PartOfSpeechAnnotation.class);
            System.out.println(coreLabel.originalText() + " = "+ pos);
        }


    }

Source File: CorenlpPipeline.java From datashare with GNU Affero General Public License v3.0

4 votes

/**
 * Process with entire pipelines
 *
 * @param input    the string to annotator
 * @param hash     the input hash code
 * @param language the input language
 * @return
 */
private Annotations processPipeline(String input, String hash, Language language) throws InterruptedException {
    Annotations annotations = new Annotations(hash, getType(), language);

    // CoreNLP annotations data-structure
    edu.stanford.nlp.pipeline.Annotation coreNlpAnnotation = new edu.stanford.nlp.pipeline.Annotation(input);

    LOGGER.info("sentencing ~ tokenizing ~ POS-tagging ~ name-finding for " + language.toString());

    // Sentencize input
    // Tokenize
    // Pos-tag
    // NER
    CoreNlpPipelineModels.getInstance().get(language).annotate(coreNlpAnnotation);
    // Feed annotations
    List<CoreMap> sentences = coreNlpAnnotation.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        int sentenceBegin = sentence.get(CharacterOffsetBeginAnnotation.class);
        int sentenceEnd = sentence.get(CharacterOffsetEndAnnotation.class);
        annotations.add(SENTENCE, sentenceBegin, sentenceEnd);

        int nerBegin = 0;
        NamedEntity.Category prevCat = NamedEntity.Category.NONE;

        List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
        for (CoreLabel token : tokens) {
            int tokenBegin = token.get(CharacterOffsetBeginAnnotation.class);
            int tokenEnd = token.get(CharacterOffsetEndAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class); // for now we don't use POS tagging
            annotations.add(TOKEN, tokenBegin, tokenEnd);
            annotations.add(POS, tokenBegin, tokenEnd);

            String cat = token.get(NamedEntityTagAnnotation.class);
            NamedEntity.Category currCat = NamedEntity.Category.parse(cat);
            if (currCat != NamedEntity.Category.NONE) {
                if (prevCat != currCat) {
                    nerBegin = tokenBegin;
                }
            } else {
                if (prevCat != currCat) {
                    annotations.add(NER, nerBegin, tokenBegin, prevCat);
                }
            }
            prevCat = currCat;
        }
    }
    return annotations;
}

Source File: NERExample.java From core-nlp-example with MIT License

3 votes

public static void main(String[] args)
    {

        StanfordCoreNLP stanfordCoreNLP = Pipeline.getPipeline();

        String text = "Hey! My  name is  Krishnan and I have friend his name is Robert." +
                " We both are living in Berlin";

        CoreDocument coreDocument = new CoreDocument(text);

        stanfordCoreNLP.annotate(coreDocument);

        List<CoreLabel> coreLabels = coreDocument.tokens();

        for(CoreLabel coreLabel : coreLabels) {

            String ner = coreLabel.get(CoreAnnotations.NamedEntityTagAnnotation.class);

            System.out.println(coreLabel.originalText() + " = "+ ner);
        }

        /*  List nameList = coreLabels
                .stream()
                .filter(coreLabel -> "Person".equalsIgnoreCase(coreLabel.get(CoreAnnotations.NamedEntityTagAnnotation.class)))
                .collect(Collectors.toList());

        System.out.println(nameList);
             */
}

Java Code Examples for edu.stanford.nlp.ling.CoreLabel#get()