Java Code Examples for edu.stanford.nlp.ling.IndexedWord#setOriginalText()
The following examples show how to use
edu.stanford.nlp.ling.IndexedWord#setOriginalText() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ImplicitExtractions.java From minie with GNU General Public License v3.0 | 5 votes |
/** Set the the relation to a is-a relation **/ public void setIsARelation() { this.rel = new AnnotatedPhrase(); IndexedWord beWord = new IndexedWord(); beWord.setWord("is"); beWord.setOriginalText("is"); beWord.setTag(POS_TAG.VBZ); beWord.setNER(NE_TYPE.NO_NER); beWord.setLemma("be"); beWord.setValue("is"); beWord.setIndex(-2); this.rel.addWordToList(beWord); this.rel.setRoot(beWord); }
Example 2
Source File: MinIE.java From minie with GNU General Public License v3.0 | 4 votes |
/** * Process possessives in the object. * If we have ("SUBJ", "REL", "NP_1 POS NP_2"), then: ("SUBJ", "REL + NP_1 + of", "NP_2") * @param prop: proposition (list of annotated phrases) */ public void processPoss(ObjectArrayList<AnnotatedPhrase> prop){ // If there's no object (clause type SV), return if (prop.size() < 3) return; AnnotatedPhrase object = prop.get(2); AnnotatedPhrase rel = prop.get(1); TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_NP_POS_NP); TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList()); int posIndex = -1; while (tMatcher.find()){ List<CoreMap> match = tMatcher.groupNodes(); // Check if the first/last word of the match is the first/last word of the object CoreLabel firstWord = new CoreLabel(match.get(0)); CoreLabel lastWord = new CoreLabel(match.get(match.size() - 1)); boolean check = false; if (firstWord.index() == object.getWordList().get(0).index()){ if (lastWord.index() == object.getWordList().get(object.getWordList().size() - 1).index()){ check = true; } } if (!check) break; for (CoreMap cm: match){ CoreLabel cl = new CoreLabel(cm); if (cl.tag().equals(POS_TAG.POS) && (cl.ner().equals(NE_TYPE.NO_NER))){ posIndex = object.getWordCoreLabelList().indexOf(cl); break; } } } if (posIndex > -1){ IndexedWord of = new IndexedWord(); of.setOriginalText("of"); of.setLemma("of"); of.setWord("of"); of.setTag("IN"); of.setNER("O"); of.setIndex(-1); ObjectArrayList<IndexedWord> pushedWords = new ObjectArrayList<>(); object.removeWordFromList(posIndex); for (int i = posIndex; i < object.getWordList().size(); i++){ pushedWords.add(object.getWordList().get(i)); } rel.addWordsToList(pushedWords); rel.addWordToList(of); object.removeWordsFromList(pushedWords); } }
Example 3
Source File: ImplicitExtractions.java From minie with GNU General Public License v3.0 | 4 votes |
/** If ORG+ POS? NP PERSON+ => "PERSON" "is NP of" "ORG" (if there are , and or -> make multiple extractions) **/ public void extractPersonIsNPOfOrg() { // Reusable variables ObjectArrayList<AnnotatedPhrase> tempProp = new ObjectArrayList<>(); ObjectArrayList<AnnotatedPhrase> subjects = new ObjectArrayList<>(); IndexedWord subjRoot; IndexedWord objRoot; this.tPattern = TokenSequencePattern.compile(REGEX.T_ORG_NP_PERSON); this.tMatcher = this.tPattern.getMatcher(CoreNLPUtils.getCoreLabelListFromIndexedWordList(this.sentence)); while (this.tMatcher.find()){ // Set the relation to be "is-a" relation this.setIsARelation(); for (IndexedWord w: CoreNLPUtils.listOfCoreMapWordsToIndexedWordList(this.tMatcher.groupNodes())) { if (w.ner().equals(NE_TYPE.PERSON)) this.subj.addWordToList(w); else if (w.ner().equals(NE_TYPE.ORGANIZATION)) this.obj.addWordToList(w); else if (w.tag().equals(POS_TAG.POS)) continue; else if (w.lemma().equals(CHARACTER.COMMA) || w.lemma().equals("and") || w.lemma().equals("or")) { subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.subj.getWordList()); subjects.add(new AnnotatedPhrase(this.subj.getWordList().clone(), subjRoot)); this.subj.clear(); } else this.rel.addWordToList(w); } subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.subj.getWordList()); subjects.add(new AnnotatedPhrase(this.subj.getWordList().clone(), subjRoot)); objRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.obj.getWordList()); IndexedWord ofWord = new IndexedWord(); ofWord.setWord("of"); ofWord.setOriginalText("of"); ofWord.setTag(POS_TAG.IN); ofWord.setNER(NE_TYPE.NO_NER); ofWord.setLemma("of"); ofWord.setValue("of"); ofWord.setIndex(-2); this.rel.addWordToList(ofWord); for (AnnotatedPhrase subject: subjects) { // Add the subj/rel/obj to the temporary proposition and then to the real propositions subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, subject.getWordList()); tempProp.add(new AnnotatedPhrase(subject.getWordList(), subjRoot)); tempProp.add(new AnnotatedPhrase(this.rel.getWordList().clone(), this.rel.getRoot())); tempProp.add(new AnnotatedPhrase(this.obj.getWordList().clone(), objRoot)); this.propositions.add(new AnnotatedProposition(tempProp.clone(), new Attribution())); tempProp.clear(); } // Clean the variables this.subj.clear(); this.obj.clear(); this.rel.clear(); } }
Example 4
Source File: ImplicitExtractions.java From minie with GNU General Public License v3.0 | 4 votes |
/** If (NP+ PERSON) => "PERSON" "is" "NP" **/ public void extractNounPerson() { // Reusable variables ObjectArrayList<AnnotatedPhrase> tempProp = new ObjectArrayList<>(); IndexedWord subjRoot; IndexedWord objRoot; // Set the relation to be "is-a" relation this.setIsARelation(); this.tPattern = TokenSequencePattern.compile(REGEX.T_NP_PERSON); this.tMatcher = this.tPattern.getMatcher(CoreNLPUtils.getCoreLabelListFromIndexedWordList(this.sentence)); while (this.tMatcher.find()){ for (IndexedWord w: CoreNLPUtils.listOfCoreMapWordsToIndexedWordList(this.tMatcher.groupNodes())) { if (w.ner().equals(NE_TYPE.PERSON)) { this.subj.addWordToList(w); } else { if (w.lemma().toLowerCase().equals("mrs.") || w.lemma().toLowerCase().equals("ms.") || w.lemma().toLowerCase().equals("mrs") || w.lemma().toLowerCase().equals("ms")) { IndexedWord female = new IndexedWord(); female.setWord("female"); female.setOriginalText("female"); female.setTag(POS_TAG.NN); female.setNER(NE_TYPE.NO_NER); female.setLemma("female"); female.setValue("female"); female.setIndex(-2); this.obj.addWordToList(female); } else if (w.lemma().toLowerCase().equals("mr.") || w.lemma().toLowerCase().equals("mr")) { IndexedWord male = new IndexedWord(); male.setWord("male"); male.setOriginalText("male"); male.setTag(POS_TAG.NN); male.setNER(NE_TYPE.NO_NER); male.setLemma("male"); male.setValue("male"); male.setIndex(-2); this.obj.addWordToList(male); } else if (Polarity.NEG_WORDS.contains(w.lemma().toLowerCase())) { continue; } else { this.obj.addWordToList(w); } } } // Add the subj/rel/obj to the temporary proposition and then to the real propositions subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.subj.getWordList()); objRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.obj.getWordList()); tempProp.add(new AnnotatedPhrase(this.subj.getWordList().clone(), subjRoot)); tempProp.add(new AnnotatedPhrase(this.rel.getWordList().clone(), this.rel.getRoot())); tempProp.add(new AnnotatedPhrase(this.obj.getWordList().clone(), objRoot)); this.propositions.add(new AnnotatedProposition(tempProp.clone(), new Attribution())); // Clean the variables tempProp.clear(); this.subj.clear(); this.obj.clear(); } // Clear the relation this.rel.clear(); }