edu.stanford.nlp.ling.IndexedWord Java Exaples

Source File: DpUtils.java From ambiverse-nlu with Apache License 2.0

6 votes

private static void subgraph(SemanticGraph graph, IndexedWord root, Collection<IndexedWord> excludeVertexes,
    Collection<GrammaticalRelation> excludeRelations, Collection<GrammaticalRelation> excludeRelationsTop,
    Collection<SemanticGraphEdge> edgesToRemove, Set<SemanticGraphEdge> exploredEdges) {
  List<SemanticGraphEdge> edges = graph.getOutEdgesSorted(root);
  for (SemanticGraphEdge e : edges) {
    if(exploredEdges.contains(e)) {
      continue;
    }
    IndexedWord child = e.getDependent();
    exploredEdges.add(e);
    if (excludeVertexes.contains(child)
        || excludeRelations.contains(e.getRelation())
        || excludeRelationsTop.contains(e.getRelation())
        || containsRelationOrDescendant(excludeRelations, e.getRelation())
        || containsRelationOrDescendant(excludeRelationsTop, e.getRelation())) {
      edgesToRemove.add(graph.getEdge(root, child));
    } else {
      subgraph(graph, child, excludeVertexes, excludeRelations, Collections.<GrammaticalRelation>emptySet(), edgesToRemove, exploredEdges);
    }
  }
}

Source File: ProcessConjunctions.java From ambiverse-nlu with Apache License 2.0

6 votes

/** Checks if a node depending on one conjoint also depends to the other */
//"He buys and sells electronic products" "Is products depending on both sells and buys?"
private static boolean isDescendant(Tree parse, SemanticGraph semanticGraph, IndexedWord checkIW, IndexedWord pivotIW, IndexedWord elementIW) {

  Tree pivot = DpUtils.getNode(pivotIW, parse, semanticGraph);
  Tree check = DpUtils.getNode(checkIW, parse, semanticGraph);
  Tree element = DpUtils.getNode(elementIW, parse, semanticGraph);

  while ((!element.value().equals("ROOT"))) {// find a common parent between the head conjoint
    // and the constituent of the element
    if (element.pathNodeToNode(element, pivot) != null) // is this efficient enough?
      break;
    element = element.parent(parse);
  }

  return element.dominates(check);
}

Source File: ReverbPropositionGeneration.java From ambiverse-nlu with Apache License 2.0

6 votes

private IndexedWord getNewRoot(Map<Integer, IndexedWord> included,
    IndexedConstituent ic, Set<IndexedWord> heads, IndexedWord start, SemanticGraph semanticGraph) {
  List<SemanticGraphEdge> outEdges = semanticGraph.getOutEdgesSorted(start);
  IndexedWord nHead;
  for(SemanticGraphEdge edge: outEdges) {
    if(heads.contains(edge.getDependent())) {
      continue;
    }
    if(included.values().contains(edge.getDependent())
        || ic.excludedVertexes.contains(edge.getDependent())) {
      if((nHead = getNewRoot(included, ic, heads, edge.getDependent(), semanticGraph)) == null) {
        continue;
      } else {
        return nHead;
      }
    } else if(!included.values().contains(edge.getDependent())
        && edge.getDependent().get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("N")) {
      return edge.getDependent();
    }
  }
  return null;
}

Source File: FrequencyCandidates.java From minie with GNU General Public License v3.0

6 votes

/**
 * Given a token sequence matcher for regular expressions for sequences over tokens, get the sub-constituents and
 * store them in the sub-constituent object sc
 * @param tMatcher: token sequence matcher for regular expressions for sequences over tokens
 * @param sc: sub-constituent object
 */
public void generateCandidatesFromTokenRegexMatch(TokenSequenceMatcher tMatcher, SubConstituent sc){
    // The matched list of words and their "root"
    ObjectArrayList<IndexedWord> matchWords;
    IndexedWord matchRoot;
    
    // Given a match, get the subconstituents
    while (tMatcher.find()){         
        matchWords = CoreNLPUtils.listOfCoreMapWordsToIndexedWordList(tMatcher.groupNodes());
        matchRoot = CoreNLPUtils.getRootFromWordList(this.sg, matchWords);
        sc.setRoot(matchRoot);
        sc.setWords(matchWords);
        sc.generateSubConstituentsFromLeft();
        for (String cand: sc.getStringSubConstituents()){
            this.candidates.add(cand);
        }
        sc.clearSubConstituentsAndCandidates();
    }
}

Source File: ProcessConjunctions.java From ambiverse-nlu with Apache License 2.0

6 votes

private static boolean shareAllAncestors(SemanticGraph semanticGraph1, SemanticGraph semanticGraph2, IndexedWord root) {

    Set<IndexedWord> d2 = semanticGraph2.descendants(root);
    if (d2 == null || d2.isEmpty()) {
      return false;
    }

    Set<IndexedWord> d1 = semanticGraph1.descendants(root);

    Set<IndexedWord> v1 = semanticGraph1
        .descendants(semanticGraph1.getFirstRoot()); //Assumes only one root, otherwhise one could delete the non used nodes and call vertexset
    Set<IndexedWord> v2 = semanticGraph2.descendants(semanticGraph2.getFirstRoot());

    int asize1 = v1.size() - d1.size();
    int asize2 = v2.size() - d2.size();

    if (asize1 != asize2) return false;

    for (IndexedWord v : v1) {
      if (d1.contains(v)) continue;
      if (!v2.contains(v)) return false;
    }
    return true;
  }

Source File: CoreNLPUtils.java From minie with GNU General Public License v3.0

6 votes

/**
 * Given a sequence of indexed words, return a string in the format "[POS1] [POS2] ... [POSn]"
 * Same as "wordsToPosMergedNerSeq", the difference being that this function returns sequence of POS tags only 
 * (ignores the NER types)  
 * When we have a verb, noun, adverb,...unify them under a "common" POS tag (e.g:VB for all verbs, NN for all nouns,etc.)
 * @param words: a list of indexed words
 * @return a string in the format "[POS1] [POS2] ... [POSn]"
 */
public static String wordsToPosMergedSeq(ObjectArrayList<IndexedWord> words){
    StringBuffer sbSeq = new StringBuffer();
    for (int i = 0; i < words.size(); i++){
        if (isAdj(words.get(i).tag()))
            sbSeq.append(POS_TAG.JJ);
        else if (isAdverb(words.get(i).tag()))
            sbSeq.append(POS_TAG.RB);
        else if (isNoun(words.get(i).tag()))
            sbSeq.append(POS_TAG.NN);
        else if (isPronoun(words.get(i).tag()))
            sbSeq.append(POS_TAG.PR);
        else if (isVerb(words.get(i).tag()))
            sbSeq.append(POS_TAG.VB);
        else if (isWhPronoun(words.get(i).tag()))
            sbSeq.append(POS_TAG.WP);
        else sbSeq.append(words.get(i).tag());
                
        sbSeq.append(SEPARATOR.SPACE); 
    }
    return sbSeq.toString().trim();
}

Source File: IndexedConstituent.java From ambiverse-nlu with Apache License 2.0

6 votes

/** Checks whether this constituent is a prepositional phrase (i.e., starts with a preposition). */
public boolean isPrepositionalPhrase() { //This is a mess, find other way of fixing. This is purelly heuristic. It needs to know the semantic graph for the sentence after this is fixed the member variable sentSemanticGraph can be removed
  List<IndexedWord> parents = semanticGraph.getParentList(
      root); //This is not the cleanest way semantics messed up. specially with the rel we cannot just check if the head is a preposition (return root.tag().equals("IN")) because the parser some times includes a preposition in the verbal phrase "He is about to win"
  for (IndexedWord parent : parents) {
    SemanticGraphEdge edge = semanticGraph.getEdge(parent, root);
    if (DpUtils.isRel(edge)) return true;
    if (DpUtils.isAnyPrep(edge)) {
      List<IndexedWord> ancestors = semanticGraph.getParentList(parent);
      for (IndexedWord ancestor : ancestors) {
        SemanticGraphEdge ed = semanticGraph.getEdge(ancestor, parent);
        if (DpUtils.isRcmod(ed)) return true;
      }
    }
  }
  return false;
  //return root.tag().equals("IN");
}

Source File: Clause.java From minie with GNU General Public License v3.0

6 votes

/**
 * Checks whether the adverbial at position {@code index} in {@link #adverbials} is required to be output by ClausIE 
 * (e.g., adverbials indicating negation, such as "hardly").
 */
private boolean isIncludedAdverbial(int index, Options options) {
    Constituent constituent = constituents.get(index);
    String s;
    if (constituent instanceof IndexedConstituent) {
        IndexedConstituent indexedConstituent = (IndexedConstituent) constituent;
        IndexedWord root = indexedConstituent.getRoot();
        if (indexedConstituent.getSemanticGraph().hasChildren(root)) {
            return false;
        }
        s = root.lemma();
    } else {
        s = constituent.rootString();
    }
    return options.dictAdverbsInclude.contains(s);
}

Source File: ClauseDetector.java From ambiverse-nlu with Apache License 2.0

6 votes

/** Creates a clause from a parataxis relation
 * @param root Head of the parataxis relation
 * @param parroot  Dependent of the parataxis relation
 * @param roots List of clause roots*/
private static void addParataxisClause(ClausIE clausIE, IndexedWord root, IndexedWord parroot, List<IndexedWord> roots, SemanticGraph semanticGraph) {
  Constituent verb = new IndexedConstituent(semanticGraph, parroot, Type.VERB);
  List<SemanticGraphEdge> outedges = semanticGraph.getOutEdgesSorted(parroot);
  SemanticGraphEdge subject = DpUtils.findFirstOfRelationOrDescendent(outedges, EnglishGrammaticalRelations.SUBJECT);
  if (subject != null) {
    Constituent subjectConst = new IndexedConstituent(semanticGraph, subject.getDependent(), Type.SUBJECT);
    Constituent object = new IndexedConstituent(semanticGraph, root, Type.DOBJ);
    ((IndexedConstituent) object).excludedVertexes.add(parroot);
    Clause clause = new Clause(semanticGraph);
    clause.setSubject(0);
    clause.setVerb(1);
    clause.getDobjects().add(2);
    clause.getConstituents().add(subjectConst);
    clause.getConstituents().add(verb);
    clause.getConstituents().add(object);
    clause.setType(Clause.Type.SVO);
    clausIE.clauses.add(clause);
    roots.add(null);

  }

}

Source File: CoreNLPUtils.java From minie with GNU General Public License v3.0

6 votes

/**
 * Given the sentence semantic graph and a list of words, get a subgraph containing just the words in the list
 * 'words'. Each typed dependency has each word from the list as a governor.
 * @param sg: sentence semantic graph
 * @param words: list of words which should contain the semantic graph
 * @return subgraph containing the words from 'words'
 * TODO: this needs to be double checked! In some cases we have weird graphs, where there are words missing. 
 * E.g. the sentence 120 from NYT "The International ... ". Try this for getting the subgraph when the source is 
 * detected.
 */
public static SemanticGraph getSubgraphFromWords(SemanticGraph sg, ObjectArrayList<IndexedWord> words){        
    // Determining the root
    int minInd = Integer.MAX_VALUE;
    IndexedWord root = new IndexedWord();
    for (IndexedWord w: words){
        if (w.index() < minInd){
            minInd = w.index();
            root = w;
        }
    }
    
    // Getting the typed dependency
    ObjectArrayList<TypedDependency> tds = new ObjectArrayList<TypedDependency>();
    for (TypedDependency td: sg.typedDependencies()){
        if (words.contains(td.gov()) && words.contains(td.dep()))
            tds.add(td);
    }
    
    // Create the semantic graph
    TreeGraphNode rootTGN = new TreeGraphNode(new CoreLabel(root));
    EnglishGrammaticalStructure gs = new EnglishGrammaticalStructure(tds, rootTGN);
    SemanticGraph phraseSg = SemanticGraphFactory.generateUncollapsedDependencies(gs);
    
    return phraseSg;
}

Source File: DpUtils.java From ambiverse-nlu with Apache License 2.0

6 votes

/** Correspondence between nodes in Tree and SemanticGraph */
public static Tree getNode(IndexedWord word, Tree depTree, SemanticGraph semanticGraph) {
  int indexSC = semanticGraph.vertexListSorted().indexOf(word);
  int indexDT = Integer.MAX_VALUE;
  Tree result = null;
  List<Tree> descTree = depTree.getLeaves();
  for (int i = descTree.size() - 1; i >= 0; i--) {
    if (descTree.get(i).toString().equals(word.word())) {
      if (i - indexSC < 0) break;
      else if ((i - indexSC) < indexDT) {
        result = descTree.get(i);
        indexDT = i - indexSC;
      }
    }
  }
  return result;
}

Source File: CoreNLPUtils.java From minie with GNU General Public License v3.0

6 votes

/**
 * Given a sequence of words and a pivot-word index, return the chained words of same NER, both from the left and 
 * from the right of the pivot word (it is assumed that the pivot word is also NER).  
 * @param sequence: a sequence of words (list of IndexedWord)
 * @param wordInd: the index of the pivot word
 * @return a list of chained nouns to the left and the right of the pivot word (the pivot word is included)
 */
public static ObjectArrayList<IndexedWord> getChainedNERs(ObjectArrayList<IndexedWord> sequence, int wordInd){
    IntArrayList chainedNounsInd = new IntArrayList();
    
    // Get the chained nouns from left and right
    IntArrayList chainedNounsLeft = getChainedNERsFromLeft(sequence, chainedNounsInd.clone(), wordInd, 
                                                           sequence.get(wordInd).ner());
    IntArrayList chainedNounsRight = getChainedNERsFromRight(sequence, chainedNounsInd.clone(), wordInd,
                                                             sequence.get(wordInd).ner());
    
    // Add all the words to the chained nouns
    chainedNounsInd.addAll(chainedNounsLeft);
    chainedNounsInd.add(wordInd);
    chainedNounsInd.addAll(chainedNounsRight);
    
    // IndexedWord chained nouns
    ObjectArrayList<IndexedWord> iChainedNouns = new ObjectArrayList<IndexedWord>();
    for (int i: FastUtil.sort(chainedNounsInd)){
        iChainedNouns.add(sequence.get(i));
    }
    
    return iChainedNouns;
}

Source File: CoreNLPUtils.java From minie with GNU General Public License v3.0

5 votes

/**
 *
 */
public static ObjectArrayList<SemanticGraphEdge> listOfIndexedWordsToParentEdges(SemanticGraph semanticGraph, ObjectOpenHashSet<IndexedWord> wordList) {
    ObjectArrayList<SemanticGraphEdge> result = new ObjectArrayList<>();
    for (IndexedWord word: wordList) {
        SemanticGraphEdge edge = semanticGraph.getEdge(semanticGraph.getParent(word), word);
        result.add(edge);
    }
    return result;
}

Source File: ClauseDetector.java From ambiverse-nlu with Apache License 2.0

5 votes

/** TODO */
private static int ancestorOf(SemanticGraph semanticGraph, IndexedWord node, List<IndexedWord> ancestors) {
  for (SemanticGraphEdge e : semanticGraph.getIncomingEdgesSorted(node)) {
    int index = ancestors.indexOf(node);
    if (index >= 0) return index;
    index = ancestorOf(semanticGraph, e.getGovernor(), ancestors);
    if (index >= 0) return index;
  }
  return -1;
}

Source File: ClauseDetector.java From ambiverse-nlu with Apache License 2.0

5 votes

/** Creates a constituent for a possessive relative clause
 * @param semanticGraph The semantic graph
 * @param poss The edge referring to the possessive relation
 * @param rcmod The relative clause modifier of the relation
 * @param constGovernor The root of the constituent
 * @param type The type of the constituent*/
private static Constituent createPossConstituent(SemanticGraph semanticGraph, SemanticGraphEdge poss, SemanticGraphEdge rcmod,
    IndexedWord constGovernor, Type type) {

  SemanticGraph newSemanticGraph = new SemanticGraph(semanticGraph);
  double weight = poss.getWeight();
  newSemanticGraph.addEdge(poss.getGovernor(), rcmod.getGovernor(), EnglishGrammaticalRelations.POSSESSION_MODIFIER, weight, false);
  Set<IndexedWord> exclude = DpUtils.exclude(newSemanticGraph, EXCLUDE_RELATIONS_COMPLEMENT, rcmod.getGovernor());
  newSemanticGraph.removeEdge(poss);
  newSemanticGraph.removeEdge(rcmod);
  return new IndexedConstituent(newSemanticGraph, constGovernor, Collections.<IndexedWord>emptySet(), exclude, type);
}

Source File: CoreNLPUtils.java From minie with GNU General Public License v3.0

5 votes

/**
 * Given a sequence of indexed words and a verb, get all the verbs 'chained' to the word from the left.
 * @param sequence: a list of words
 * @param wordInd: the word index from where the search starts 
 * @return a list of verbs which precede 'word'
 */
private static IntArrayList getChainedVerbsFromLeft(ObjectArrayList<IndexedWord> sequence, 
        IntArrayList chainedVerbs, int wordInd){
    // If the word is the leftiest word or it's not a verb - return
    if (wordInd > 0 && isVerb(sequence.get(wordInd - 1).tag())){
        chainedVerbs.add(wordInd-1);
        getChainedVerbsFromLeft(sequence, chainedVerbs, wordInd-1);
    }
    
    return chainedVerbs;
}

Source File: ImplicitExtractions.java From minie with GNU General Public License v3.0

5 votes

/** Generate some extractions from TokenRegex patterns **/
public void generateSequentialPatternExtractions() {
    // Reusable variables
    ObjectArrayList<AnnotatedPhrase> tempProp = new ObjectArrayList<>();
    IndexedWord subjRoot;
    IndexedWord objRoot;
    
    this.tPattern = TokenSequencePattern.compile(REGEX.T_ORG_IN_LOC);
    this.tMatcher = this.tPattern.getMatcher(CoreNLPUtils.getCoreLabelListFromIndexedWordList(this.sentence));
    while (this.tMatcher.find()){
        this.setIsARelation();
        for (IndexedWord w: CoreNLPUtils.listOfCoreMapWordsToIndexedWordList(this.tMatcher.groupNodes())) {
            if (w.ner().equals(NE_TYPE.ORGANIZATION)) {
                this.subj.addWordToList(w);
            }
            else if (w.ner().equals(NE_TYPE.LOCATION)) {
                this.obj.addWordToList(w);
            }
            else if (w.ner().equals(NE_TYPE.NO_NER) && w.tag().equals(POS_TAG.IN)) {
                this.rel.addWordToList(w);
            }
        }
        subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, subj.getWordList());
        objRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, obj.getWordList());
        tempProp.add(new AnnotatedPhrase(this.subj.getWordList().clone(), subjRoot));
        tempProp.add(new AnnotatedPhrase(this.rel.getWordList().clone(), this.rel.getRoot()));
        tempProp.add(new AnnotatedPhrase(this.obj.getWordList().clone(), objRoot));
        this.propositions.add(new AnnotatedProposition(tempProp.clone(), new Attribution()));
        
        // Clean the variables
        tempProp.clear();
        this.subj.clear();
        this.rel.clear();
        this.obj.clear();
    }
}

Source File: PropositionGenerator.java From ambiverse-nlu with Apache License 2.0

5 votes

/** Generates a textual representation of a given constituent in a given clause*/
private void generate(Clause clause, int constituentIndex, int factPosition, Collection<GrammaticalRelation> excludeRelations,
    Collection<GrammaticalRelation> excludeRelationsTop, Proposition proposition) {
  Constituent constituent = clause.getConstituents().get(constituentIndex);
  if (constituent instanceof TextConstituent) {
    IndexedWord iw = new IndexedWord();
    iw.setWord(((TextConstituent) constituent).text());
    iw.setValue(((TextConstituent) constituent).text());
    proposition.constituents.put(factPosition, ((TextConstituent) constituent).text());
    proposition.addTokens(factPosition, iw);
    proposition.addHead(factPosition, iw);
  } else if (constituent instanceof IndexedConstituent) {
    IndexedConstituent iconstituent = (IndexedConstituent) constituent;
    proposition.addHead(factPosition, iconstituent.getRoot());
    SemanticGraph subgraph = clause.createSemanticGraph(false);
    DpUtils.removeEdges(subgraph, iconstituent.getRoot(), excludeRelations, excludeRelationsTop);
    Set<IndexedWord> words = new TreeSet<IndexedWord>(subgraph.descendants(iconstituent.getRoot()));
    for (IndexedWord v : iconstituent.getAdditionalVertexes()) {
      words.addAll(subgraph.descendants(v));
    }
    if (iconstituent.isPrepositionalPhrase()) words.remove(iconstituent.getRoot());
    proposition.constituents.put(factPosition, generatePhrase(iconstituent, words));
    proposition.addTokens(factPosition, words);
  } else {
    throw new IllegalArgumentException();
  }
}

Source File: CoreNLPUtils.java From minie with GNU General Public License v3.0

5 votes

public static ObjectArrayList<IndexedWord> getWordListFromCoreMapList(List<CoreMap> coreMapList){
    ObjectArrayList<IndexedWord> coreLabelList = new ObjectArrayList<>();
    for (CoreMap cm: coreMapList){
        coreLabelList.add(new IndexedWord(new CoreLabel(cm)));
    }
    return coreLabelList;
}

Source File: IndexedConstituent.java From minie with GNU General Public License v3.0

5 votes

/** Checks whether this constituent is a prepositional phrase (i.e., starts with a preposition). */
public boolean isPrepositionalPhrase(SemanticGraph sentSemanticGraph) { //This is a mess, find other way of fixing. This is purelly heuristic. 
	//It needs to know the semantic graph for the sentence after this is fixed the member variable sentSemanticGraph 
	//can be removed
	List<IndexedWord> parents = semanticGraph.getParentList(root); 	//This is not the cleanest way semantics messed up. 
																	//specially with the rel we cannot just check if 
																	//the head is a preposition 
																	//(return root.tag().equals("IN")) because the 
																	//parser some times includes a preposition in the 
																	//verbal phrase "He is about to win"
	for(IndexedWord parent: parents) {
		SemanticGraphEdge edge = semanticGraph.getEdge(parent, root);
		if(DpUtils.isRel(edge))
			return true;
		if(DpUtils.isAnyPrep(edge)) {
			List<IndexedWord> ancestors = sentSemanticGraph.getParentList(parent);
			
			for(IndexedWord ancestor: ancestors) {
				SemanticGraphEdge ed = sentSemanticGraph.getEdge(ancestor, parent);
				if(DpUtils.isRcmod(ed))
					return true;
			}
			
		}
	}
	return false;
    //return root.tag().equals("IN");
}

Source File: CoreNLPUtils.java From minie with GNU General Public License v3.0

5 votes

/**
 * Given a sequence of indexed words and a verb, get all the verbs 'chained' to the word from the right.
 * @param sequence: a list of words
 * @param wordInd: the word index from where the search starts 
 * @return a list of verbs which precede 'word'
 */
private static IntArrayList getChainedVerbsFromRight(ObjectArrayList<IndexedWord> sequence, 
        IntArrayList chainedVerbs, int wordInd){
    // If the word is the rightiest word or it's not a verb - return
    if (wordInd < sequence.size()-1 && isVerb(sequence.get(wordInd + 1).tag())){
        chainedVerbs.add(wordInd + 1);
        getChainedVerbsFromRight(sequence, chainedVerbs, wordInd + 1);
    }
    
    return chainedVerbs;
}

Source File: CoreNLPUtils.java From minie with GNU General Public License v3.0

5 votes

/**
 * Given a sequence of indexed words and a noun, get all the nouns 'chained' to the word from the left.
 * @param sequence: a list of words
 * @param wordInd: the word index from where the search starts 
 * @return a list of nouns which precede 'word'
 */
private static IntArrayList getChainedNounsFromLeft(ObjectArrayList<IndexedWord> sequence, 
        IntArrayList chainedNouns, int wordInd){
    // If the word is the leftiest word or it's not a noun - return
    if (wordInd > 0 && isNoun(sequence.get(wordInd-1).tag())){
        chainedNouns.add(wordInd-1);
        getChainedNounsFromLeft(sequence, chainedNouns, wordInd-1);
    }
    
    return chainedNouns;
}

Source File: ImplicitExtractions.java From minie with GNU General Public License v3.0

5 votes

/** If "city|town of LOCATION" => "LOCATION" "is" "city|town" **/
public void extractCityOfLocation() {
    // Reusable variable
    ObjectArrayList<AnnotatedPhrase> tempProp = new ObjectArrayList<>();
    IndexedWord subjRoot;
    IndexedWord objRoot;
    
    // Set the relation to be "is-a" relation
    this.setIsARelation();
    
    this.tPattern = TokenSequencePattern.compile(REGEX.T_CITY_OF_LOC);
    this.tMatcher = tPattern.getMatcher(CoreNLPUtils.getCoreLabelListFromIndexedWordList(this.sentence));
    while (this.tMatcher.find()){    
        ObjectArrayList<IndexedWord> mWords = CoreNLPUtils.listOfCoreMapWordsToIndexedWordList(this.tMatcher.groupNodes());
        for (IndexedWord w: mWords) {
            if (!w.ner().equals(NE_TYPE.LOCATION) && !w.tag().equals(POS_TAG.IN))
                this.obj.addWordToList(w);
            else{ 
                if (!w.tag().equals(POS_TAG.IN))
                    this.subj.addWordToList(w);
            }
        }
        
        // Add the subj/rel/obj to the temporary proposition and then to the real propositions
        subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.subj.getWordList());
        objRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.obj.getWordList());
        tempProp.add(new AnnotatedPhrase(this.subj.getWordList().clone(), subjRoot));
        tempProp.add(new AnnotatedPhrase(this.rel.getWordList().clone(), this.rel.getRoot()));
        tempProp.add(new AnnotatedPhrase(this.obj.getWordList().clone(), objRoot));
        this.propositions.add(new AnnotatedProposition(tempProp.clone(), new Attribution()));
                
        // Clean the variables
        tempProp.clear();
        this.subj.clear();
        this.obj.clear();
    }
    
    // Clear the relation
    this.rel.clear();
}

Source File: MinIE.java From minie with GNU General Public License v3.0

5 votes

/**
 * Given an annotated proposition, check if it contains a clause modifier as an object. If so, return 'true', else
 * return 'false'
 * @param proposition: annotated proposition
 * @return: 'true' if the object is a clause modifier; 'false' otherwise
 */
public boolean detectClauseModifier(ObjectArrayList<AnnotatedPhrase> proposition){
    /*for (IndexedWord word: proposition.get(1).getWordList()){
        if (word.index() == -2)
            continue;
        if (this.sentenceSemGraph.getParent(word) != null){
            SemanticGraphEdge edge = this.sentenceSemGraph.getEdge(this.sentenceSemGraph.getParent(word), word);
            if ((edge.getRelation() == EnglishGrammaticalRelations.SUBJECT) || 
                (edge.getRelation() == EnglishGrammaticalRelations.NOMINAL_SUBJECT) ||
                (edge.getRelation() == EnglishGrammaticalRelations.CLAUSAL_SUBJECT) ||
                (edge.getRelation() == EnglishGrammaticalRelations.NOMINAL_PASSIVE_SUBJECT)){
                return true;
            }
        }
    }*/
    
    if (CoreNLPUtils.verbInList(proposition.get(2).getWordList())){
        for (IndexedWord word: proposition.get(2).getWordList()){
            if (this.sentenceSemGraph.getParent(word) != null){
                SemanticGraphEdge edge = this.sentenceSemGraph.getEdge(this.sentenceSemGraph.getParent(word), word);
                if ((edge.getRelation() == EnglishGrammaticalRelations.SUBJECT) || 
                    (edge.getRelation() == EnglishGrammaticalRelations.NOMINAL_SUBJECT) ||
                    (edge.getRelation() == EnglishGrammaticalRelations.CLAUSAL_SUBJECT) ||
                    (edge.getRelation() == EnglishGrammaticalRelations.NOMINAL_PASSIVE_SUBJECT)){
                    return true;
                }
            }
        }
    }
    return false;
}

Source File: DpUtils.java From minie with GNU General Public License v3.0

5 votes

/** Finds the first occurrence of a grammatical relation or its descendants for a relative pronoun */
public static SemanticGraphEdge findDescendantRelativeRelation(SemanticGraph semanticGraph, IndexedWord root, 
        GrammaticalRelation rel) {
    List<SemanticGraphEdge> outedges = semanticGraph.getOutEdgesSorted(root);
    for (SemanticGraphEdge e : outedges) {
        if (e.getDependent().tag().charAt(0) == 'W' && rel.isAncestor(e.getRelation())) {
            return e;
        } else
            return findDescendantRelativeRelation(semanticGraph, e.getDependent(), rel);
    }
    return null;
}

Source File: ProcessConjunctions.java From minie with GNU General Public License v3.0

5 votes

/** Checks if two conjoints verbs share all dependents */
private static boolean shareAll(List<SemanticGraphEdge> outedges, IndexedWord root, IndexedWord conj, 
        SemanticGraph semGraph, GrammaticalRelation rel) {
    for (SemanticGraphEdge edge : outedges) {
        if (DpUtils.isAnySubj(edge) || edge.getDependent().equals(conj))
            continue;
        else if (!isDescendant(conj, root, edge.getDependent(), semGraph))
            return false;
    }

    return true;
}

Source File: Edges.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

/**
 * Get the full text of the main mention of a particular word, if it has a
 * better mention. Otherwise just get it's segment of the tree using
 * concatNoun()
 * 
 * @param phrase
 * @param w
 * @return
 */
public static String getMainMention(
		Phrase phrase, SemanticGraph graph, IndexedWord word) {
	Pair<CorefMention, CorefMention> linked_refs =
			phrase.getUnpronoun().get(word.index());
	if (linked_refs == null) {
		return Trees.concatNoun(graph, word);
	} else {
		return linked_refs.second.mentionSpan;
	}
}

Source File: DpUtils.java From minie with GNU General Public License v3.0

5 votes

/**
 * The dependency parse might contain some special tokens (or whole words) which we don't want. 
 * Filter those out. 
 * @return boolean: if true, the token needs to be filtered, false -> otherwise
 */
public static boolean filterTokens(IndexedWord word){
    return word.word().equals(".") || word.word().equals(",") || word.word().equals("-RRB-") || 
            word.word().equals("-LRB-") || word.word().equals("\"") || word.word().equals("\'\'") || 
            word.word().equals("``") || word.word().equals(";") || word.word().equals(":") || 
            word.word().equals("-") || (word.word().equals("'") && !word.tag().equals("POS")) || 
            word.word().equals("!") || word.word().equals("--") || word.word().equals("`") || 
            word.word().equals("?") || word.word().equals("-RCB-") || word.word().equals("-LCB-");
}

Source File: CoreNLPUtils.java From minie with GNU General Public License v3.0

5 votes

/**
 * Given a sequence of indexed words, return a string in the format "[POS1|NER1] [POS2|NER2] ... [POSn|NERn]"
 * If a given word has a NER type -> write the type, else -> write the POS tag. 
 * When we have a verb, noun, adverb,...unify them under a "common" POS tag (e.g:VB for all verbs, NN for all nouns,etc.)
 * @param words: a list of indexed words
 * @return a string in the format "[POS1|NER1] [POS2|NER2] ... [POSn|NERn]"
 */
public static String wordsToPosMergedNerSeq(ObjectArrayList<IndexedWord> words){
    StringBuffer sbSeq = new StringBuffer();
    for (int i = 0; i < words.size(); i++){
        if (words.get(i).ner().equals(NE_TYPE.NO_NER)){
            if (isAdj(words.get(i).tag()))
                sbSeq.append(POS_TAG.JJ);
            else if (isAdverb(words.get(i).tag()))
                sbSeq.append(POS_TAG.RB);
            else if (isNoun(words.get(i).tag()))
                sbSeq.append(POS_TAG.NN);
            else if (isPronoun(words.get(i).tag()))
                sbSeq.append(POS_TAG.PR);
            else if (isVerb(words.get(i).tag()))
                sbSeq.append(POS_TAG.VB);
            else if (isWhPronoun(words.get(i).tag()))
                sbSeq.append(POS_TAG.WP);
            else sbSeq.append(words.get(i).tag());
                
            sbSeq.append(SEPARATOR.SPACE);
        } else {
            sbSeq.append(words.get(i).ner());
            sbSeq.append(SEPARATOR.SPACE);
        }
    }
    return sbSeq.toString().trim();
}

Source File: CoreNLPUtils.java From minie with GNU General Public License v3.0

5 votes

public static ObjectOpenHashSet<IndexedWord> getWordSetFromCoreMapList(List<CoreMap> coreMapList){
    ObjectOpenHashSet<IndexedWord> coreLabelSet = new ObjectOpenHashSet<>();
    for (CoreMap cm: coreMapList){
        coreLabelSet.add(new IndexedWord(new CoreLabel(cm)));
    }
    return coreLabelSet;
}

edu.stanford.nlp.ling.IndexedWord Java Examples