edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetEndAnnotation Java Examples
The following examples show how to use
edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetEndAnnotation.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CoreNlpTokenizer.java From jstarcraft-nlp with Apache License 2.0 | 5 votes |
@Override public boolean incrementToken() { clearAttributes(); while (tokens == null || !tokens.hasNext()) if (!getNextSentence()) return false; CoreLabel token = tokens.next(); // Use the lemmatized word: String word = token.get(LemmaAnnotation.class); if (word == null) { // Fallback when no lemmatization happens. word = token.get(TextAnnotation.class); } termAttribute.setLength(0); termAttribute.append(word); // NER or part of speech annotation String pos = token.get(NamedEntityTagAnnotation.class); pos = (pos == null || "O".equals(pos)) ? token.get(PartOfSpeechAnnotation.class) : pos; typeAttribute.setType(pos != null ? pos : TypeAttribute.DEFAULT_TYPE); // Token character offsets int be = token.get(CharacterOffsetBeginAnnotation.class).intValue(); int en = token.get(CharacterOffsetEndAnnotation.class).intValue(); offsetAttribute.setOffset(be, en); // Token in-document position increment: positionAttribute.setPositionIncrement(1 + skippedTokens); skippedTokens = 0; return true; }
Example #2
Source File: CoreNLPHelper.java From Heracles with GNU General Public License v3.0 | 4 votes |
public static Annotation reconstructStanfordAnnotations(Span sentenceSpan, HashMap<Integer, Word> wordIndex, boolean useWordOrderInsteadOfOffset){ String originalText = sentenceSpan.getAnnotation("text", String.class); Annotation a = new Annotation(originalText); a.set(TextAnnotation.class, originalText); //a.set(DocIDAnnotation.class, "document"); List<CoreMap> sentenceAnnotations = new ArrayList<CoreMap>(); a.set(SentencesAnnotation.class, sentenceAnnotations); List<CoreLabel> tokenAnnotations = new ArrayList<CoreLabel>(); a.set(TokensAnnotation.class, tokenAnnotations); ArrayCoreMap sentenceAnnotation = new ArrayCoreMap(); sentenceAnnotations.add(sentenceAnnotation); // int startOffset = sentenceSpan.first().getStartOffset(); for (Word w : sentenceSpan){ CoreLabel c = new CoreLabel(); c.set(TextAnnotation.class, w.getWord()); c.set(OriginalTextAnnotation.class, w.getWord()); c.set(ValueAnnotation.class, w.getWord()); c.set(CharacterOffsetBeginAnnotation.class, w.getStartOffset()); c.set(CharacterOffsetEndAnnotation.class, w.getEndOffset()); c.set(IndexAnnotation.class, w.getOrder()+1); // c.setIndex(w.getOrder()); c.set(SentenceIndexAnnotation.class, 0); // c.setSentIndex(0); c.set(DocIDAnnotation.class, "document"); c.setDocID("document"); if (w.hasAnnotation("pos")) c.set(PartOfSpeechAnnotation.class, w.getAnnotation("pos",String.class)); if (w.hasAnnotation("lemma")) c.set(LemmaAnnotation.class, w.getAnnotation("lemma", String.class)); if (w.hasAnnotation("nerLabel")) c.set(NamedEntityTagAnnotation.class, w.getAnnotation("nerLabel", String.class)); if (w.hasAnnotation("nerValue")) c.set(NormalizedNamedEntityTagAnnotation.class, w.getAnnotation("nerValue", String.class)); tokenAnnotations.add(c); if (useWordOrderInsteadOfOffset){ wordIndex.put(w.getOrder(), w); } else { wordIndex.put(w.getStartOffset(), w); } } //essential sentence annotation: TokensAnnotation sentenceAnnotation.set(TokensAnnotation.class, tokenAnnotations); //essential sentence annotation: TextAnnotation sentenceAnnotation.set(TextAnnotation.class, originalText); //essential sentence annotation: SentenceIndexAnnotation sentenceAnnotation.set(SentenceIndexAnnotation.class, 0); sentenceAnnotation.set(CharacterOffsetBeginAnnotation.class, 0); sentenceAnnotation.set(CharacterOffsetEndAnnotation.class, sentenceSpan.last().getEndOffset()); sentenceAnnotation.set(TokenBeginAnnotation.class, 0); sentenceAnnotation.set(TokenEndAnnotation.class, sentenceSpan.last().getOrder()); return a; }
Example #3
Source File: JsonPipeline.java From tac2015-event-detection with GNU General Public License v3.0 | 4 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) static void addEntityMentions(Map<String,Object> sent_info, CoreMap sentence) { List<CoreMap> coreMentions = sentence.get(MentionsAnnotation.class); List<Map> jsonMentions = new ArrayList<>(); /* trying to figure out the keys in each mention. here's a printout from one. MENTION August 2014 class edu.stanford.nlp.ling.CoreAnnotations$TextAnnotation August 2014 class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetBeginAnnotation 3 class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetEndAnnotation 14 class edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation [August-2, 2014-3] class edu.stanford.nlp.ling.CoreAnnotations$TokenBeginAnnotation 1 class edu.stanford.nlp.ling.CoreAnnotations$TokenEndAnnotation 3 class edu.stanford.nlp.ling.CoreAnnotations$NamedEntityTagAnnotation DATE class edu.stanford.nlp.ling.CoreAnnotations$NormalizedNamedEntityTagAnnotation 2014-08 class edu.stanford.nlp.ling.CoreAnnotations$EntityTypeAnnotation DATE class edu.stanford.nlp.ling.CoreAnnotations$SentenceIndexAnnotation 0 class edu.stanford.nlp.time.TimeAnnotations$TimexAnnotation <TIMEX3 tid="t1" type="DATE" value="2014-08">August 2014</TIMEX3> MENTION Barack Obama class edu.stanford.nlp.ling.CoreAnnotations$TextAnnotation Barack Obama class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetBeginAnnotation 17 class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetEndAnnotation 29 class edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation [Barack-5, Obama-6] class edu.stanford.nlp.ling.CoreAnnotations$TokenBeginAnnotation 4 class edu.stanford.nlp.ling.CoreAnnotations$TokenEndAnnotation 6 class edu.stanford.nlp.ling.CoreAnnotations$NamedEntityTagAnnotation PERSON class edu.stanford.nlp.ling.CoreAnnotations$EntityTypeAnnotation PERSON class edu.stanford.nlp.ling.CoreAnnotations$SentenceIndexAnnotation 0 MENTION Paris class edu.stanford.nlp.ling.CoreAnnotations$TextAnnotation Paris class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetBeginAnnotation 66 class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetEndAnnotation 71 class edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation [Paris-5] class edu.stanford.nlp.ling.CoreAnnotations$TokenBeginAnnotation 14 class edu.stanford.nlp.ling.CoreAnnotations$TokenEndAnnotation 15 class edu.stanford.nlp.ling.CoreAnnotations$NamedEntityTagAnnotation LOCATION class edu.stanford.nlp.ling.CoreAnnotations$EntityTypeAnnotation LOCATION class edu.stanford.nlp.ling.CoreAnnotations$SentenceIndexAnnotation 1 */ for (CoreMap mention : coreMentions) { // U.p("MENTION " + mention); // for (Class k : mention.keySet()) { // U.pf("%s\t%s\n", k, mention.get(k)); // } Map m = new HashMap<String, Object>(); m.put("tokspan", Lists.newArrayList( mention.get(TokenBeginAnnotation.class).intValue(), mention.get(TokenEndAnnotation.class).intValue())); m.put("charspan", Lists.newArrayList( mention.get(CharacterOffsetBeginAnnotation.class).intValue(), mention.get(CharacterOffsetEndAnnotation.class).intValue())); m.put("sentence", mention.get(SentenceIndexAnnotation.class).intValue()); String entityType = mention.get(EntityTypeAnnotation.class); m.put("type", entityType); if (mention.containsKey(NormalizedNamedEntityTagAnnotation.class)) { m.put("normalized", mention.get(NormalizedNamedEntityTagAnnotation.class)); } if (mention.containsKey(TimexAnnotation.class)) { m.put("timex_xml", mention.get(TimexAnnotation.class).toString()); } jsonMentions.add(m); } sent_info.put("entitymentions", jsonMentions); }