Java Code Examples for edu.stanford.nlp.ling.CoreLabel#set()
The following examples show how to use
edu.stanford.nlp.ling.CoreLabel#set() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GalicianReadability.java From tint with GNU General Public License v3.0 | 6 votes |
@Override public void addingContentWord(CoreLabel token) { super.addingContentWord(token); token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4); String lemma = token.lemma(); if (model.getLevel3Lemmas().contains(lemma)) { level3WordSize++; token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3); } if (model.getLevel2Lemmas().contains(lemma)) { level2WordSize++; token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2); } if (model.getLevel1Lemmas().contains(lemma)) { level1WordSize++; token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1); } }
Example 2
Source File: ItalianReadability.java From tint with GNU General Public License v3.0 | 6 votes |
@Override public void addingContentWord(CoreLabel token) { super.addingContentWord(token); HashMap<Integer, HashMultimap<String, String>> easyWords = model.getEasyWords(); String simplePos = getGenericPos(token.get(CoreAnnotations.PartOfSpeechAnnotation.class)); String lemma = token.get(CoreAnnotations.LemmaAnnotation.class); token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4); if (easyWords.get(3).get(simplePos).contains(lemma)) { level3WordSize++; token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3); } if (easyWords.get(2).get(simplePos).contains(lemma)) { level2WordSize++; token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2); } if (easyWords.get(1).get(simplePos).contains(lemma)) { level1WordSize++; token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1); } }
Example 3
Source File: DigiCompMorphAnnotator.java From tint with GNU General Public License v3.0 | 5 votes |
@Override public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel c : tokens) { String[] morph_fatures = c.get(DigiMorphAnnotations.MorphoAnnotation.class).split(" "); String lemma = c.get(CoreAnnotations.LemmaAnnotation.class); if (morph_fatures.length > 1) { List<String> comps = new ArrayList<>(); for (String m : morph_fatures) { if (m.startsWith(lemma + "+") || m.startsWith(lemma + "~")) { comps.add(m); } } c.set(DigiMorphAnnotations.MorphoCompAnnotation.class, comps); } else { if (morph_fatures[0].startsWith(lemma + "+") || morph_fatures[0].startsWith(lemma + "~")) { c.set(DigiMorphAnnotations.MorphoCompAnnotation.class, new ArrayList<String>(Arrays.asList(morph_fatures[0]))); } } } } } }
Example 4
Source File: DigiInverseMorphAnnotator.java From tint with GNU General Public License v3.0 | 5 votes |
public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel c : tokens) { String[] morph_features = c.get(DigiMorphAnnotations.MorphoAnnotation.class).split(" "); c.set(DigiInverseMorphAnnotations.InverseMorphoAnnotation.class, morph_features[0]); } } } }
Example 5
Source File: SpanishReadability.java From tint with GNU General Public License v3.0 | 5 votes |
@Override public void addingContentWord(CoreLabel token) { super.addingContentWord(token); token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4); String lemma = token.lemma(); if (model.getLevel3Lemmas().contains(lemma)) { level3WordSize++; token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3); } if (model.getLevel2Lemmas().contains(lemma)) { level2WordSize++; token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2); } if (model.getLevel1Lemmas().contains(lemma)) { level1WordSize++; token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1); } // System.out.println("Adding content word (lemma): " + lemma); // System.out.println(model.getLevel1Lemmas().contains(lemma)); // System.out.println(model.getLevel2Lemmas().contains(lemma)); // System.out.println(model.getLevel3Lemmas().contains(lemma)); // System.out.println(); // HashMap<Integer, HashMultimap<String, String>> easyWords = model.getEasyWords(); // String simplePos = getGenericPos(token.get(CoreAnnotations.PartOfSpeechAnnotation.class)); // String lemma = token.get(CoreAnnotations.LemmaAnnotation.class); // // if (easyWords.get(1).get(simplePos).contains(lemma)) { // level1WordSize++; // } // if (easyWords.get(2).get(simplePos).contains(lemma)) { // level2WordSize++; // } // if (easyWords.get(3).get(simplePos).contains(lemma)) { // level3WordSize++; // } }
Example 6
Source File: UPosAnnotator.java From tint with GNU General Public License v3.0 | 5 votes |
@Override public void annotate(Annotation annotation) { for (CoreLabel token : annotation.get(CoreAnnotations.TokensAnnotation.class)) { String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class); String[] parts = pos.split("\\+"); StringBuffer upos = new StringBuffer(); for (String part : parts) { String thisPos = uposMap.getOrDefault(part, DEFAULT_UPOS); upos.append("+").append(thisPos); } token.set(CustomAnnotations.UPosAnnotation.class, upos.substring(1)); } }
Example 7
Source File: ProcessorTools.java From phrasal with GNU General Public License v3.0 | 5 votes |
private static CoreLabel createDatum(String character, String label, int index, String parentToken, int charIndex) { CoreLabel labeledCharacter = new CoreLabel(); labeledCharacter.set(CoreAnnotations.TextAnnotation.class, character); labeledCharacter.set(CoreAnnotations.CharAnnotation.class, character); labeledCharacter.set(CoreAnnotations.ParentAnnotation.class, parentToken); labeledCharacter.set(CoreAnnotations.AnswerAnnotation.class, label); labeledCharacter.set(CoreAnnotations.GoldAnswerAnnotation.class, label); labeledCharacter.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, charIndex); labeledCharacter.setIndex(index); return labeledCharacter; }
Example 8
Source File: StopwordAnnotator.java From coreNlp with Apache License 2.0 | 5 votes |
@Override public void annotate(Annotation annotation) { if (stopwords != null && stopwords.size() > 0 && annotation.containsKey(TokensAnnotation.class)) { List<CoreLabel> tokens = annotation.get(TokensAnnotation.class); for (CoreLabel token : tokens) { boolean isWordStopword = stopwords.contains(token.word().toLowerCase()); boolean isLemmaStopword = checkLemma ? stopwords.contains(token.word().toLowerCase()) : false; Pair<Boolean, Boolean> pair = Pair.makePair(isWordStopword, isLemmaStopword); token.set(StopwordAnnotator.class, pair); } } }
Example 9
Source File: CoreNLPHelper.java From Heracles with GNU General Public License v3.0 | 4 votes |
public static Annotation reconstructStanfordAnnotations(Span sentenceSpan, HashMap<Integer, Word> wordIndex, boolean useWordOrderInsteadOfOffset){ String originalText = sentenceSpan.getAnnotation("text", String.class); Annotation a = new Annotation(originalText); a.set(TextAnnotation.class, originalText); //a.set(DocIDAnnotation.class, "document"); List<CoreMap> sentenceAnnotations = new ArrayList<CoreMap>(); a.set(SentencesAnnotation.class, sentenceAnnotations); List<CoreLabel> tokenAnnotations = new ArrayList<CoreLabel>(); a.set(TokensAnnotation.class, tokenAnnotations); ArrayCoreMap sentenceAnnotation = new ArrayCoreMap(); sentenceAnnotations.add(sentenceAnnotation); // int startOffset = sentenceSpan.first().getStartOffset(); for (Word w : sentenceSpan){ CoreLabel c = new CoreLabel(); c.set(TextAnnotation.class, w.getWord()); c.set(OriginalTextAnnotation.class, w.getWord()); c.set(ValueAnnotation.class, w.getWord()); c.set(CharacterOffsetBeginAnnotation.class, w.getStartOffset()); c.set(CharacterOffsetEndAnnotation.class, w.getEndOffset()); c.set(IndexAnnotation.class, w.getOrder()+1); // c.setIndex(w.getOrder()); c.set(SentenceIndexAnnotation.class, 0); // c.setSentIndex(0); c.set(DocIDAnnotation.class, "document"); c.setDocID("document"); if (w.hasAnnotation("pos")) c.set(PartOfSpeechAnnotation.class, w.getAnnotation("pos",String.class)); if (w.hasAnnotation("lemma")) c.set(LemmaAnnotation.class, w.getAnnotation("lemma", String.class)); if (w.hasAnnotation("nerLabel")) c.set(NamedEntityTagAnnotation.class, w.getAnnotation("nerLabel", String.class)); if (w.hasAnnotation("nerValue")) c.set(NormalizedNamedEntityTagAnnotation.class, w.getAnnotation("nerValue", String.class)); tokenAnnotations.add(c); if (useWordOrderInsteadOfOffset){ wordIndex.put(w.getOrder(), w); } else { wordIndex.put(w.getStartOffset(), w); } } //essential sentence annotation: TokensAnnotation sentenceAnnotation.set(TokensAnnotation.class, tokenAnnotations); //essential sentence annotation: TextAnnotation sentenceAnnotation.set(TextAnnotation.class, originalText); //essential sentence annotation: SentenceIndexAnnotation sentenceAnnotation.set(SentenceIndexAnnotation.class, 0); sentenceAnnotation.set(CharacterOffsetBeginAnnotation.class, 0); sentenceAnnotation.set(CharacterOffsetEndAnnotation.class, sentenceSpan.last().getEndOffset()); sentenceAnnotation.set(TokenBeginAnnotation.class, 0); sentenceAnnotation.set(TokenEndAnnotation.class, sentenceSpan.last().getOrder()); return a; }
Example 10
Source File: Readability.java From tint with GNU General Public License v3.0 | 4 votes |
public void addingContentWord(CoreLabel token) { token.set(ReadabilityAnnotations.ContentWord.class, true); }
Example 11
Source File: Readability.java From tint with GNU General Public License v3.0 | 4 votes |
public void addingWord(CoreLabel token) { token.set(ReadabilityAnnotations.LiteralWord.class, true); }
Example 12
Source File: Readability.java From tint with GNU General Public License v3.0 | 4 votes |
public void addWord(CoreLabel token) { token.set(ReadabilityAnnotations.ContentWord.class, false); token.set(ReadabilityAnnotations.LiteralWord.class, false); String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class); // String lemma = token.get(CoreAnnotations.LemmaAnnotation.class); String word = token.word(); addingToken(token); if (isWordPos(pos)) { addingWord(token); wordCount++; docLenLettersOnly += token.endPosition() - token.beginPosition(); word = flattenToAscii(word); Hyphenation hyphenation = hyphenator.hyphenate(word); boolean done = false; if (hyphenation != null) { try { String h = hyphenation.toString(); incrementHyphenCount(hyphenation.length() + 1); token.set(ReadabilityAnnotations.HyphenationAnnotation.class, h); done = true; hyphenWordCount++; } catch (Exception e) { // ignored } } if (!done && word.length() < 5) { incrementHyphenCount(1); hyphenWordCount++; } if (isContentPos(pos)) { contentWordSize++; addingContentWord(token); } if (isEasyPos(pos)) { contentEasyWordSize++; addingEasyWord(token); } } if (token.get(ReadabilityAnnotations.HyphenationAnnotation.class) == null) { token.set(ReadabilityAnnotations.HyphenationAnnotation.class, token.originalText()); } String genericPos = getGenericPos(pos); posStats.add(pos); genericPosStats.add(genericPos); }