Java Code Examples for edu.stanford.nlp.ling.CoreLabel#get()
The following examples show how to use
edu.stanford.nlp.ling.CoreLabel#get() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Postprocess.java From phrases with Apache License 2.0 | 6 votes |
public List<Pattern> run(List<Pattern> patterns) { Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, sentiment"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); for (Pattern pattern : patterns) { Annotation annotation = pipeline.process(pattern.toSentences()); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.AnnotatedTree.class); int sentiment = RNNCoreAnnotations.getPredictedClass(tree); for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { String lemma = token.get(CoreAnnotations.LemmaAnnotation.class); } } } return null; }
Example 2
Source File: Chapter4.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 6 votes |
private static void usingStanfordNER() { String model = getModelDir() + "\\english.conll.4class.distsim.crf.ser.gz"; CRFClassifier<CoreLabel> classifier = CRFClassifier.getClassifierNoExceptions(model); String sentence = ""; for (String element : sentences) { sentence += element; } List<List<CoreLabel>> entityList = classifier.classify(sentence); for (List<CoreLabel> internalList : entityList) { for (CoreLabel coreLabel : internalList) { String word = coreLabel.word(); String category = coreLabel.get(CoreAnnotations.AnswerAnnotation.class); // System.out.println(word + ":" + category); if (!"O".equals(category)) { System.out.println(word + ":" + category); } } } }
Example 3
Source File: ItalianReadability.java From tint with GNU General Public License v3.0 | 6 votes |
static public void addDescriptionForm(String form, HashMap<Integer, Integer> indexes, int start, int numberOfTokens, TreeMap<Integer, DescriptionForm> forms, Annotation annotation, HashMap<String, GlossarioEntry> glossario) { Integer lemmaIndex = indexes.get(start); if (lemmaIndex == null) { return; } CoreLabel firstToken = annotation.get(CoreAnnotations.TokensAnnotation.class).get(lemmaIndex); CoreLabel endToken = annotation.get(CoreAnnotations.TokensAnnotation.class) .get(lemmaIndex + numberOfTokens - 1); Integer beginOffset = firstToken.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class); Integer endOffset = endToken.get(CoreAnnotations.CharacterOffsetEndAnnotation.class); GlossarioEntry glossarioEntry = glossario.get(form); if (glossarioEntry == null) { return; } DescriptionForm descriptionForm = new DescriptionForm( beginOffset, endOffset, glossarioEntry); forms.put(beginOffset, descriptionForm); }
Example 4
Source File: ItalianReadability.java From tint with GNU General Public License v3.0 | 6 votes |
@Override public void addingContentWord(CoreLabel token) { super.addingContentWord(token); HashMap<Integer, HashMultimap<String, String>> easyWords = model.getEasyWords(); String simplePos = getGenericPos(token.get(CoreAnnotations.PartOfSpeechAnnotation.class)); String lemma = token.get(CoreAnnotations.LemmaAnnotation.class); token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4); if (easyWords.get(3).get(simplePos).contains(lemma)) { level3WordSize++; token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3); } if (easyWords.get(2).get(simplePos).contains(lemma)) { level2WordSize++; token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2); } if (easyWords.get(1).get(simplePos).contains(lemma)) { level1WordSize++; token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1); } }
Example 5
Source File: NerWithDepartmentTest.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
public static List<String> extractNER(String doc){ Annotation document = new Annotation(doc); pipeline.annotate(document); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); List<String> result = new ArrayList<String>(); for(CoreMap sentence: sentences) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods for (CoreLabel token: sentence.get(CoreAnnotations.TokensAnnotation.class)) { // this is the text of the token String word = token.get(CoreAnnotations.TextAnnotation.class); // this is the NER label of the token String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class); result.add(ne); System.out.println(word + "\t" + ne); } } return result; }
Example 6
Source File: CoreNlpToken.java From jstarcraft-nlp with Apache License 2.0 | 5 votes |
@Override public CoreNlpToken next() { CoreLabel label = iterator.next(); text = label.get(CoreAnnotations.TextAnnotation.class); nature = label.get(CoreAnnotations.PartOfSpeechAnnotation.class); begin = label.beginPosition(); end = label.endPosition(); return this; }
Example 7
Source File: StopwordAnnotatorTest.java From coreNlp with Apache License 2.0 | 5 votes |
/** * Test to validate that stopwords are properly annotated in the token list * @throws Exception */ @org.junit.Test public void testLuceneStopwordList() throws Exception { Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, stopword"); props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation document = new Annotation(example); pipeline.annotate(document); List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class); //get the standard lucene stopword set Set<?> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET; for (CoreLabel token : tokens) { //get the stopword annotation Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class); String word = token.word().toLowerCase(); if (stopWords.contains(word)) { assertTrue(stopword.first()); } else { assertFalse(stopword.first()); } //not checking lemma, so always false assertFalse(stopword.second()); } }
Example 8
Source File: Chapter8.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 5 votes |
private static void usingStanfordPipelineParallel() { Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); String path = "C:\\Current Books\\NLP and Java\\Downloads\\stanford-ner-2014-10-26\\classifiers"; props.put("ner.model", path + "/english.muc.7class.distsim.crf.ser.gz"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation annotation1 = new Annotation("The robber took the cash and ran."); Annotation annotation2 = new Annotation("The policeman chased him down the street."); Annotation annotation3 = new Annotation("A passerby, watching the action, tripped the thief as he passed by."); Annotation annotation4 = new Annotation("They all lived happily everafter, except for the thief of course."); ArrayList<Annotation> list = new ArrayList(); list.add(annotation1); list.add(annotation2); list.add(annotation3); list.add(annotation4); Iterable<Annotation> iterable = list; pipeline.annotate(iterable); System.out.println("Total time: " + pipeline.timingInformation()); List<CoreMap> sentences = annotation2.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String word = token.get(TextAnnotation.class); String pos = token.get(PartOfSpeechAnnotation.class); System.out.println("Word: " + word + " POS Tag: " + pos); } } }
Example 9
Source File: CoreNlpTokenizer.java From jstarcraft-nlp with Apache License 2.0 | 5 votes |
@Override public boolean incrementToken() { clearAttributes(); while (tokens == null || !tokens.hasNext()) if (!getNextSentence()) return false; CoreLabel token = tokens.next(); // Use the lemmatized word: String word = token.get(LemmaAnnotation.class); if (word == null) { // Fallback when no lemmatization happens. word = token.get(TextAnnotation.class); } termAttribute.setLength(0); termAttribute.append(word); // NER or part of speech annotation String pos = token.get(NamedEntityTagAnnotation.class); pos = (pos == null || "O".equals(pos)) ? token.get(PartOfSpeechAnnotation.class) : pos; typeAttribute.setType(pos != null ? pos : TypeAttribute.DEFAULT_TYPE); // Token character offsets int be = token.get(CharacterOffsetBeginAnnotation.class).intValue(); int en = token.get(CharacterOffsetEndAnnotation.class).intValue(); offsetAttribute.setOffset(be, en); // Token in-document position increment: positionAttribute.setPositionIncrement(1 + skippedTokens); skippedTokens = 0; return true; }
Example 10
Source File: CoreNLP.java From gAnswer with BSD 3-Clause "New" or "Revised" License | 5 votes |
public Word[] getTaggedWords (String sentence) { CoreMap taggedSentence = getPOS(sentence); Word[] ret = new Word[taggedSentence.get(TokensAnnotation.class).size()]; int count = 0; for (CoreLabel token : taggedSentence.get(TokensAnnotation.class)) { // this is the text of the token String word = token.get(TextAnnotation.class); // this is the POS tag of the token String pos = token.get(PartOfSpeechAnnotation.class); //System.out.println(word+"["+pos+"]"); ret[count] = new Word(getBaseFormOfPattern(word.toLowerCase()), word, pos, count+1); count ++; } return ret; }
Example 11
Source File: CoreNlpExample.java From core-nlp-example with MIT License | 5 votes |
public static void main(String[] args) { // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // read some text in the text variable String text = "What is the Weather in Bangalore right now?"; // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : sentences) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { // this is the text of the token String word = token.get(CoreAnnotations.TextAnnotation.class); // this is the POS tag of the token String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class); // this is the NER label of the token String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class); System.out.println(String.format("Print: word: [%s] pos: [%s] ne: [%s]", word, pos, ne)); } } }
Example 12
Source File: DigiCompMorphAnnotator.java From tint with GNU General Public License v3.0 | 5 votes |
@Override public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel c : tokens) { String[] morph_fatures = c.get(DigiMorphAnnotations.MorphoAnnotation.class).split(" "); String lemma = c.get(CoreAnnotations.LemmaAnnotation.class); if (morph_fatures.length > 1) { List<String> comps = new ArrayList<>(); for (String m : morph_fatures) { if (m.startsWith(lemma + "+") || m.startsWith(lemma + "~")) { comps.add(m); } } c.set(DigiMorphAnnotations.MorphoCompAnnotation.class, comps); } else { if (morph_fatures[0].startsWith(lemma + "+") || morph_fatures[0].startsWith(lemma + "~")) { c.set(DigiMorphAnnotations.MorphoCompAnnotation.class, new ArrayList<String>(Arrays.asList(morph_fatures[0]))); } } } } } }
Example 13
Source File: StopwordAnnotatorTest.java From coreNlp with Apache License 2.0 | 5 votes |
/** * Test to validate that the custom stopword list words * @throws Exception */ @org.junit.Test public void testCustomStopwordList() throws Exception { //setup coreNlp properties for stopwords. Note the custom stopword list property Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, stopword"); props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator"); props.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList); //get the custom stopword set Set<?> stopWords = StopwordAnnotator.getStopWordList(Version.LUCENE_36, customStopWordList, true); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation document = new Annotation(example); pipeline.annotate(document); List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel token : tokens) { //get the stopword annotation Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class); String word = token.word().toLowerCase(); if (stopWords.contains(word)) { assertTrue(stopword.first()); } else { assertFalse(stopword.first()); } //not checking lemma, so always false assertFalse(stopword.second()); } }
Example 14
Source File: ReplaceSubordinateRule.java From tint with GNU General Public License v3.0 | 4 votes |
@Override public String apply(Annotation annotation, Map<Integer, HashMultimap<Integer, Integer>> children) { InverseDigiMorph dm = new InverseDigiMorph(); int conj = 0; List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); CoreMap sentence = sentences.get(0); // SemanticGraph semanticGraph = sentence .get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class); IndexedWord node = semanticGraph.getNodeByIndex(conj + 1); List<IndexedWord> history = getHistory(semanticGraph, node); if (history.size() == 1) { return null; } IndexedWord verb = history.get(1); CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(verb.index() - 1); String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class); if (!pos.startsWith("V")) { return null; } // todo: check subject in parse tree // todo: check clitics String morpho = token.get(DigiMorphAnnotations.MorphoAnnotation.class); String[] parts = morpho.split("\\s+"); TreeSet<String> persons = new TreeSet<>(); String tempo = null; for (int i = 1; i < parts.length; i++) { String[] vParts = parts[i].split("\\+"); if (!vParts[1].equals("v")) { continue; } String modo = vParts[2]; if (!modo.equals("cong")) { continue; } tempo = vParts[3]; persons.add(vParts[5] + "+" + vParts[6]); } IndexedWord next = null; if (persons.size() != 1) { for (int i = 2; i < history.size(); i++) { if (history.get(i).get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("V")) { next = history.get(i); break; } } persons = getPersons(semanticGraph, next, sentence); } StringBuffer stringBuffer = new StringBuffer(); stringBuffer.append(token.lemma()); stringBuffer.append("+v+indic+").append(tempo); stringBuffer.append("+nil+"); // Add person stringBuffer.append(persons.last()); String find = stringBuffer.toString(); System.out.println(find); String inverseMorphology = dm.getInverseMorphology(find); System.out.println(inverseMorphology); System.out.println(morpho); System.out.println(tempo); System.out.println(persons); // System.out.println(annotation.get(UDPipeAnnotations.UDPipeOriginalAnnotation.class)); // System.out.println(sentence.get(CoreAnnotations.TokensAnnotation.class).get(2) // .get(UDPipeAnnotations.FeaturesAnnotation.class)); // System.out.println(token // .get(UDPipeAnnotations.FeaturesAnnotation.class)); // // System.out.println(children.get(0).get(verb.index())); // System.out.println(children); // System.out.println(verb.get(UDPipeAnnotations.FeaturesAnnotation.class)); // try { // System.out.println(JSONOutputter.jsonPrint(annotation)); // } catch (IOException e) { // e.printStackTrace(); // } // System.out.println(getHistory(semanticGraph, node)); // System.out.println(semanticGraph.getOutEdgesSorted(node)); // System.out.println(semanticGraph.getIncomingEdgesSorted(node)); // System.out.println(node); return null; }
Example 15
Source File: CoreNLPPosTagger.java From Heracles with GNU General Public License v3.0 | 4 votes |
/** * Process the Dataset in chunks, as defined by the <code>spanType</code> parameter. * The Spans denoted by spanType must each contain Words belonging to a single sentence. * */ @Override public void validatedProcess(Dataset dataset, String spanTypeOfSentenceUnit){ // if (dataset.getPerformedNLPTasks().contains(getTask())){ // Framework.error("This dataset has already been tagged with POS."); // return; // } //check if prerequisites are satisfied if (!dataset.getPerformedNLPTasks().containsAll(prerequisites)){ HashSet<NLPTask> missingTasks = new HashSet<>(); missingTasks.addAll(prerequisites); missingTasks.removeAll(dataset.getPerformedNLPTasks()); Framework.error("This dataset does not meet the requirements to use this component! Missing tasks: " + missingTasks); return; } Properties prop1 = new Properties(); prop1.setProperty("annotators", "pos"); StanfordCoreNLP pipeline = new StanfordCoreNLP(prop1, false); for (Span span : dataset.getSpans(spanTypeOfSentenceUnit)){ HashMap<Integer, Word> wordIndex = new HashMap<>(); Annotation a = CoreNLPHelper.reconstructStanfordAnnotations(span, wordIndex); if (a == null){ System.out.println(a); } pipeline.annotate(a); List<CoreMap> sentenceAnnotations = a.get(SentencesAnnotation.class); for (CoreMap sentence : sentenceAnnotations){ for (CoreLabel token: sentence.get(TokensAnnotation.class)) { Word w = wordIndex.get(token.get(CharacterOffsetBeginAnnotation.class)); String tempPos = token.get(PartOfSpeechAnnotation.class); if (w.hasAnnotation("URI")){ w.putAnnotation("pos", "NNP"); } else { w.putAnnotation("pos", tempPos); } // System.out.println(w.getAnnotations()); } } } }
Example 16
Source File: CoreNLPToJSON.java From phrasal with GNU General Public License v3.0 | 4 votes |
/** * Process an English text file. * * @param args * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length < 1) { System.err.printf("Usage: java %s file [inputproperties_str] > json_output%n", CoreNLPToJSON.class.getName()); System.exit(-1); } String textFile = args[0]; InputProperties inputProperties = args.length > 1 ? InputProperties.fromString(args[1]) : new InputProperties(); StanfordCoreNLP coreNLP = new StanfordCoreNLP(properties); // Configure tokenizer EnglishPreprocessor preprocessor = new EnglishPreprocessor(true); // Use a map with ordered keys so that the output is ordered by segmentId. Map<Integer,SourceSegment> annotations = new TreeMap<Integer,SourceSegment>(); LineNumberReader reader = IOTools.getReaderFromFile(textFile); for (String line; (line = reader.readLine()) != null;) { Annotation annotation = coreNLP.process(line); List<CoreMap> sentences = annotation.get(SentencesAnnotation.class); if (sentences.size() != 1) { throw new RuntimeException("Sentence splitting on line: " + String.valueOf(reader.getLineNumber())); } CoreMap sentence = sentences.get(0); Tree tree = sentence.get(TreeAnnotation.class); tree.indexLeaves(); int[] chunkVector = getChunkVector(tree); List<CoreLabel> tokens = sentence.get(TokensAnnotation.class); int numTokens = tokens.size(); SymmetricalWordAlignment alignment = preprocessor.processAndAlign(line); if (alignment.e().size() != numTokens) { throw new RuntimeException(String.format("Tokenizer configurations differ: %d/%d", alignment.e().size(), numTokens)); } SourceSegment segment = new SourceSegment(numTokens); segment.layoutSpec.addAll(makeLayoutSpec(alignment)); segment.inputProperties = inputProperties.toString(); for (int j = 0; j < numTokens; ++j) { CoreLabel token = tokens.get(j); String word = token.get(TextAnnotation.class); segment.tokens.add(unescape(word)); String pos = mapPOS(token.get(PartOfSpeechAnnotation.class)); segment.pos.add(pos); String ne = token.get(NamedEntityTagAnnotation.class); segment.ner.add(ne); segment.chunkVector[j] = chunkVector[j]; } annotations.put(reader.getLineNumber()-1, segment); } reader.close(); System.err.printf("Processed %d sentences%n", reader.getLineNumber()); final SourceDocument jsonDocument = new SourceDocument(textFile, annotations); // Convert to json Gson gson = new Gson(); String json = gson.toJson(jsonDocument); System.out.println(json); }
Example 17
Source File: Readability.java From tint with GNU General Public License v3.0 | 4 votes |
public void addWord(CoreLabel token) { token.set(ReadabilityAnnotations.ContentWord.class, false); token.set(ReadabilityAnnotations.LiteralWord.class, false); String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class); // String lemma = token.get(CoreAnnotations.LemmaAnnotation.class); String word = token.word(); addingToken(token); if (isWordPos(pos)) { addingWord(token); wordCount++; docLenLettersOnly += token.endPosition() - token.beginPosition(); word = flattenToAscii(word); Hyphenation hyphenation = hyphenator.hyphenate(word); boolean done = false; if (hyphenation != null) { try { String h = hyphenation.toString(); incrementHyphenCount(hyphenation.length() + 1); token.set(ReadabilityAnnotations.HyphenationAnnotation.class, h); done = true; hyphenWordCount++; } catch (Exception e) { // ignored } } if (!done && word.length() < 5) { incrementHyphenCount(1); hyphenWordCount++; } if (isContentPos(pos)) { contentWordSize++; addingContentWord(token); } if (isEasyPos(pos)) { contentEasyWordSize++; addingEasyWord(token); } } if (token.get(ReadabilityAnnotations.HyphenationAnnotation.class) == null) { token.set(ReadabilityAnnotations.HyphenationAnnotation.class, token.originalText()); } String genericPos = getGenericPos(pos); posStats.add(pos); genericPosStats.add(genericPos); }
Example 18
Source File: POSExample.java From core-nlp-example with MIT License | 4 votes |
public static void main(String[] args) { StanfordCoreNLP stanfordCoreNLP = Pipeline.getPipeline(); String text = "Hey! I am Dinesh Krishnan."; CoreDocument coreDocument = new CoreDocument(text); stanfordCoreNLP.annotate(coreDocument); List<CoreLabel> coreLabelList = coreDocument.tokens(); for(CoreLabel coreLabel : coreLabelList) { String pos = coreLabel.get(CoreAnnotations.PartOfSpeechAnnotation.class); System.out.println(coreLabel.originalText() + " = "+ pos); } }
Example 19
Source File: CorenlpPipeline.java From datashare with GNU Affero General Public License v3.0 | 4 votes |
/** * Process with entire pipelines * * @param input the string to annotator * @param hash the input hash code * @param language the input language * @return */ private Annotations processPipeline(String input, String hash, Language language) throws InterruptedException { Annotations annotations = new Annotations(hash, getType(), language); // CoreNLP annotations data-structure edu.stanford.nlp.pipeline.Annotation coreNlpAnnotation = new edu.stanford.nlp.pipeline.Annotation(input); LOGGER.info("sentencing ~ tokenizing ~ POS-tagging ~ name-finding for " + language.toString()); // Sentencize input // Tokenize // Pos-tag // NER CoreNlpPipelineModels.getInstance().get(language).annotate(coreNlpAnnotation); // Feed annotations List<CoreMap> sentences = coreNlpAnnotation.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { int sentenceBegin = sentence.get(CharacterOffsetBeginAnnotation.class); int sentenceEnd = sentence.get(CharacterOffsetEndAnnotation.class); annotations.add(SENTENCE, sentenceBegin, sentenceEnd); int nerBegin = 0; NamedEntity.Category prevCat = NamedEntity.Category.NONE; List<CoreLabel> tokens = sentence.get(TokensAnnotation.class); for (CoreLabel token : tokens) { int tokenBegin = token.get(CharacterOffsetBeginAnnotation.class); int tokenEnd = token.get(CharacterOffsetEndAnnotation.class); String pos = token.get(PartOfSpeechAnnotation.class); // for now we don't use POS tagging annotations.add(TOKEN, tokenBegin, tokenEnd); annotations.add(POS, tokenBegin, tokenEnd); String cat = token.get(NamedEntityTagAnnotation.class); NamedEntity.Category currCat = NamedEntity.Category.parse(cat); if (currCat != NamedEntity.Category.NONE) { if (prevCat != currCat) { nerBegin = tokenBegin; } } else { if (prevCat != currCat) { annotations.add(NER, nerBegin, tokenBegin, prevCat); } } prevCat = currCat; } } return annotations; }
Example 20
Source File: NERExample.java From core-nlp-example with MIT License | 3 votes |
public static void main(String[] args) { StanfordCoreNLP stanfordCoreNLP = Pipeline.getPipeline(); String text = "Hey! My name is Krishnan and I have friend his name is Robert." + " We both are living in Berlin"; CoreDocument coreDocument = new CoreDocument(text); stanfordCoreNLP.annotate(coreDocument); List<CoreLabel> coreLabels = coreDocument.tokens(); for(CoreLabel coreLabel : coreLabels) { String ner = coreLabel.get(CoreAnnotations.NamedEntityTagAnnotation.class); System.out.println(coreLabel.originalText() + " = "+ ner); } /* List nameList = coreLabels .stream() .filter(coreLabel -> "Person".equalsIgnoreCase(coreLabel.get(CoreAnnotations.NamedEntityTagAnnotation.class))) .collect(Collectors.toList()); System.out.println(nameList); */ }