edu.stanford.nlp.ling.CoreAnnotations Java Exaples

Source File: ConstituentExample.java From blog-codes with Apache License 2.0

7 votes

public static void main(String[] args) {
	// set up pipeline properties
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse");
	// use faster shift reduce parser
	//props.setProperty("parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz");
	props.setProperty("parse.maxlen", "100");
	// set up Stanford CoreNLP pipeline
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	// build annotation for a review
	Annotation annotation = new Annotation("The small red car turned very quickly around the corner.");
	// annotate
	pipeline.annotate(annotation);
	// get tree
	Tree tree = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(0)
			.get(TreeCoreAnnotations.TreeAnnotation.class);
	System.out.println(tree);
	Set<Constituent> treeConstituents = tree.constituents(new LabeledScoredConstituentFactory());
	for (Constituent constituent : treeConstituents) {
		if (constituent.label() != null
				&& (constituent.label().toString().equals("VP") || constituent.label().toString().equals("NP"))) {
			System.err.println("found constituent: " + constituent.toString());
			System.err.println(tree.getLeaves().subList(constituent.start(), constituent.end() + 1));
		}
	}
}

Source File: InteractiveDriver.java From InformationExtraction with GNU General Public License v3.0

6 votes

public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
    Set<String> interested = Stream.of("per:title", "per:employee_of", "org:top_members/employees").collect(Collectors.toSet());
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(r -> {
                 String relation = r.relationGloss();
                if(interested.contains(relation)) {
                    System.err.println(r);
                }
            });
        }
    });
}

Source File: SentimentAnalyzer.java From blog-codes with Apache License 2.0

6 votes

public SentimentResult getSentimentResult(String text) {
	SentimentClassification classification = new SentimentClassification();
	SentimentResult sentimentResult = new SentimentResult();
	if (text != null && text.length() > 0) {
		Annotation annotation = pipeline.process(text);
		for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
			Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
			SimpleMatrix simpleMatrix = RNNCoreAnnotations.getPredictions(tree);

			classification.setVeryNegative((double) Math.round(simpleMatrix.get(0) * 100d));
			classification.setNegative((double) Math.round(simpleMatrix.get(1) * 100d));
			classification.setNeutral((double) Math.round(simpleMatrix.get(2) * 100d));
			classification.setPositive((double) Math.round(simpleMatrix.get(3) * 100d));
			classification.setVeryPositive((double) Math.round(simpleMatrix.get(4) * 100d));

			String setimentType = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
			sentimentResult.setSentimentType(setimentType);
			sentimentResult.setSentimentClass(classification);
			sentimentResult.setSentimentScore(RNNCoreAnnotations.getPredictedClass(tree));
		}
	}
	return sentimentResult;
}

Source File: Tokens.java From ambiverse-nlu with Apache License 2.0

6 votes

public static Tokens getTokensFromJCas(JCas jCas) {
  Tokens tokens = new Tokens();
  int s_number = 0; //DKPro does not give sentence index????????
  int t_number = 0;
  for (Sentence sentence : select(jCas, Sentence.class)) {
    List<de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token> dktokens = selectCovered(jCas,
        de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token.class, sentence);
    for (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token t : dktokens) {
      CoreLabel taggedWord = CoreNlpUtils.tokenToWord(
          t); //This step should be avoided. Transform directly from DKPRO to AIDA TOKEN. Problem POS mappings. AIDA works with Stanford tags
      Token aidaToken = new Token(t_number, t.getCoveredText(), t.getBegin(), t.getEnd(), 0);
      aidaToken.setPOS(taggedWord.get(CoreAnnotations.PartOfSpeechAnnotation.class));
      aidaToken.setSentence(s_number);
      tokens.addToken(aidaToken);
      t_number++;
    }
    s_number++;
  }
  return tokens;
}

Source File: StanfordRNNDParser.java From ambiverse-nlu with Apache License 2.0

6 votes

@Override public void process(JCas jCas) throws AnalysisEngineProcessException {
  mappingProvider.configure(jCas.getCas());
  DKPro2CoreNlp converter = new DKPro2CoreNlp();
  Annotation annotatios = converter.convert(jCas, new Annotation());
  List<CoreMap> sentences = annotatios.get(CoreAnnotations.SentencesAnnotation.class);
  for (CoreMap sentence : sentences) {
    GrammaticalStructure gs = parser.predict(sentence);
    SemanticGraph semanticGraph = SemanticGraphFactory.makeFromTree(gs, SemanticGraphFactory.Mode.CCPROCESSED, GrammaticalStructure.Extras.MAXIMAL, null);;
    semanticGraph.prettyPrint();
    semanticGraph = semanticGraphUniversalEnglishToEnglish(semanticGraph);
    sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, semanticGraph);
    for(SemanticGraphEdge edge: semanticGraph.edgeListSorted()) {
      System.out.println(edge);
    }
  }
  convertDependencies(jCas, annotatios, true);
}

Source File: StanfordTokenizer.java From ambiverse-nlu with Apache License 2.0

6 votes

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  String text = aJCas.getDocumentText();
  Annotation document = new Annotation(text);
  StanfordCoreNLP stanfordCoreNLP;

  if(!languageMap.containsKey(aJCas.getDocumentLanguage())) {
    throw new AnalysisEngineProcessException(new LanguageNotSupportedException("Language Not Supported"));
  }

  stanfordCoreNLP = stanfordCoreNLPs[languageMap.get(aJCas.getDocumentLanguage())];

  stanfordCoreNLP.annotate(document);
  List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
  for (CoreMap sentence : sentences) {
    int sstart = sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    int ssend = sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
    Sentence jsentence = new Sentence(aJCas, sstart, ssend);
    jsentence.addToIndexes();

    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
      Token casToken = new Token(aJCas, token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
      casToken.addToIndexes();
    }
  }
}

Source File: ReverbPropositionGeneration.java From ambiverse-nlu with Apache License 2.0

6 votes

private IndexedWord getNewRoot(Map<Integer, IndexedWord> included,
    IndexedConstituent ic, Set<IndexedWord> heads, IndexedWord start, SemanticGraph semanticGraph) {
  List<SemanticGraphEdge> outEdges = semanticGraph.getOutEdgesSorted(start);
  IndexedWord nHead;
  for(SemanticGraphEdge edge: outEdges) {
    if(heads.contains(edge.getDependent())) {
      continue;
    }
    if(included.values().contains(edge.getDependent())
        || ic.excludedVertexes.contains(edge.getDependent())) {
      if((nHead = getNewRoot(included, ic, heads, edge.getDependent(), semanticGraph)) == null) {
        continue;
      } else {
        return nHead;
      }
    } else if(!included.values().contains(edge.getDependent())
        && edge.getDependent().get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("N")) {
      return edge.getDependent();
    }
  }
  return null;
}

Source File: Chapter4.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

6 votes

private static void usingStanfordNER() {
        String model = getModelDir() + "\\english.conll.4class.distsim.crf.ser.gz";
        CRFClassifier<CoreLabel> classifier = CRFClassifier.getClassifierNoExceptions(model);

        String sentence = "";
        for (String element : sentences) {
            sentence += element;
        }

        List<List<CoreLabel>> entityList = classifier.classify(sentence);

        for (List<CoreLabel> internalList : entityList) {
            for (CoreLabel coreLabel : internalList) {
                String word = coreLabel.word();
                String category = coreLabel.get(CoreAnnotations.AnswerAnnotation.class);
//                System.out.println(word + ":" + category);
                if (!"O".equals(category)) {
                    System.out.println(word + ":" + category);
                }

            }

        }
    }

Source File: Chapter4.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

6 votes

private static void usingStanfordNER() {
        String model = getModelDir() + "\\english.conll.4class.distsim.crf.ser.gz";
        CRFClassifier<CoreLabel> classifier = CRFClassifier.getClassifierNoExceptions(model);

        String sentence = "";
        for (String element : sentences) {
            sentence += element;
        }

        List<List<CoreLabel>> entityList = classifier.classify(sentence);

        for (List<CoreLabel> internalList : entityList) {
            for (CoreLabel coreLabel : internalList) {
                String word = coreLabel.word();
                String category = coreLabel.get(CoreAnnotations.AnswerAnnotation.class);
//                System.out.println(word + ":" + category);
                if (!"O".equals(category)) {
                    System.out.println(word + ":" + category);
                }

            }

        }
    }

Source File: ComparisonUtils.java From NLIWOD with GNU Affero General Public License v3.0

6 votes

/**
 * Retrieves a part of speech from the given string, depending on the parameter tag.
 * JJR for comparatives and JJS for superlatives.
 * @param question String to retrieve words from. 
 * @param tag JJR for comparatives and JJS for superlatives.
 * @return List of the retrieved words. 
 */
private ArrayList<String> getWords(String question, String tag) {
	if(question == null || tag == null) return null;
	Annotation annotation = new Annotation(question);
       PIPELINE.annotate(annotation);  
       List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
       
		ArrayList<String> words = new ArrayList<String>();		
		for (CoreMap sentence : sentences) {
           List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
           for(CoreLabel token: tokens) {
              	if(token.tag().startsWith(tag)){
           		String word = token.toString();
           		words.add(word.substring(0, word.lastIndexOf("-")));
           	}
           }
       }       	
		return words;
	}

Source File: CorefTool.java From Criteria2Query with Apache License 2.0

6 votes

public void extractCoref() {
	String s="Subjects with hypothyroidism who are on stable treatment for 3 months prior to screening are required to have TSH and free thyroxine (FT4) obtained. If the TSH value is out of range, but FT4 is normal, such cases should be discussed directly with the JRD responsible safety physician before the subject is enrolled. If the FT4 value is out of range, the subject is not eligible.";
	 Annotation document = new Annotation(s);
	    Properties props = new Properties();
	    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,mention,coref");
	    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	    pipeline.annotate(document);
	    System.out.println("---");
	    System.out.println("coref chains");
	    for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
	      System.out.println("\t" + cc);
	    }
	    for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
	      System.out.println("---");
	      System.out.println("mentions");
	      for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
	        System.out.println("\t" + m);
	       }
	    }
}

Source File: ParserAnnotatorUtils.java From Heracles with GNU General Public License v3.0

6 votes

/**
 * Set the tags of the original tokens and the leaves if they
 * aren't already set.
 */
public static void setMissingTags(CoreMap sentence, Tree tree) {
  List<TaggedWord> taggedWords = null;
  List<Label> leaves = null;
  List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
  for (int i = 0, size = tokens.size(); i < size; ++i) {
    CoreLabel token = tokens.get(i);
    if (token.tag() == null) {
      if (taggedWords == null) {
        taggedWords = tree.taggedYield();
      }
      if (leaves == null) {
        leaves = tree.yield();
      }
      token.setTag(taggedWords.get(i).tag());
      Label leaf = leaves.get(i);
      if (leaf instanceof HasTag) {
        ((HasTag) leaf).setTag(taggedWords.get(i).tag());
      }
    }
  }
}

Source File: StanfordCoref.java From Graphene with GNU General Public License v3.0

6 votes

@Override
public CoreferenceContent doCoreferenceResolution(String text) {
	Annotation document = new Annotation(text);
	PIPELINE.annotate(document);

	// extract sentences
	List<Sentence> sentences = new ArrayList<>();
	for (CoreMap coreMap : document.get(CoreAnnotations.SentencesAnnotation.class)) {
		Sentence sentence = new Sentence();
		for (CoreLabel coreLabel : coreMap.get(CoreAnnotations.TokensAnnotation.class)) {
			sentence.addWord(coreLabel.word());
		}
		sentences.add(sentence);
	}

	// replace coreferences
	for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
		String coreMention = cc.getRepresentativeMention().mentionSpan;
		for (CorefChain.CorefMention corefMention : cc.getMentionsInTextualOrder()) {
			sentences.get(corefMention.sentNum-1).replaceWords(corefMention.startIndex-1, corefMention.endIndex-1, getReplacement(corefMention.mentionSpan, coreMention));
		}
	}

	return new CoreferenceContent(text, sentences.stream().map(s -> s.toString()).collect(Collectors.joining(" ")));
}

Source File: FeatureSet.java From winter with Apache License 2.0

6 votes

/**
 * Prepares the check for a temporal expression.
 * 
 * @param cell
 *            Holds the column´s cell
 * @param pipeline
 *            Used for temporal expressions.
 * @param result
 *            Holds the intermediate result before executing this operation.
 * @return Holds the intermediate result after executing this operation.
 */

private int prepareSUTParser(String cell, AnnotationPipeline pipeline,
		int result) {
	if ((!cell.trim().isEmpty()) && (!cell.trim().equals("-")
			&& !cell.trim().equals("--") && !cell.trim().equals("---")
			&& !cell.trim().equals("n/a") && !cell.trim().equals("N/A")
			&& !cell.trim().equals("(n/a)")
			&& !cell.trim().equals("Unknown")
			&& !cell.trim().equals("unknown") && !cell.trim().equals("?")
			&& !cell.trim().equals("??") && !cell.trim().equals(".")
			&& !cell.trim().equals("null") && !cell.trim().equals("NULL")
			&& !cell.trim().equals("Null"))) {
		Annotation annotation = new Annotation(cell);
		annotation.set(CoreAnnotations.DocDateAnnotation.class,
				"2013-07-14");
		pipeline.annotate(annotation);

		List<CoreMap> timexAnnsAll = annotation
				.get(TimeAnnotations.TimexAnnotations.class);
		if (timexAnnsAll != null)
			if (!timexAnnsAll.isEmpty())
				result++;
	}
	return result;
}

Source File: TintPipeline.java From tint with GNU General Public License v3.0

6 votes

public Annotation runRaw(String text, @Nullable StanfordCoreNLP pipeline) {
    load();

    Annotation annotation = new Annotation(text);
    LOGGER.debug("Text: {}", text);
    if (documentDate == null) {
        documentDate = Instant.now().toString().substring(0, 10);
    }

    annotation.set(CoreAnnotations.DocDateAnnotation.class, documentDate);
    if (pipeline == null) {
        pipeline = new StanfordCoreNLP(props);
    }
    pipeline.annotate(annotation);
    annotation.set(TimingAnnotations.TimingAnnotation.class, pipeline.timingInformation());

    return annotation;
}

Source File: NumberOfToken.java From NLIWOD with GNU Affero General Public License v3.0

6 votes

/***
 * Returns a list of all noun phrases of the question q.
 * @param q  a question
 * @return list of noun phrases
 */
private ArrayList<String> getNounPhrases(String q) {
		ArrayList<String> nounP = new ArrayList<String>();
    
		Annotation annotation = new Annotation(q);
       PIPELINE.annotate(annotation);
     
       List<CoreMap> question = annotation.get(CoreAnnotations.SentencesAnnotation.class);
       
       for (CoreMap sentence : question) {
           SemanticGraph basicDeps = sentence.get(BasicDependenciesAnnotation.class);
           Collection<TypedDependency> typedDeps = basicDeps.typedDependencies();
        
           Iterator<TypedDependency> dependencyIterator = typedDeps.iterator();
           while(dependencyIterator.hasNext()) {
           	TypedDependency dependency = dependencyIterator.next();
           	String depString = dependency.reln().toString();
           	if(depString.equals("compound") || depString.equals("amod")) {
           		String dep = dependency.dep().toString();
           		String gov = dependency.gov().toString();
           		nounP.add(dep.substring(0, dep.lastIndexOf("/")) + " " + gov.substring(0, gov.lastIndexOf("/")));
           	}
           }
       }    
       return nounP;
	}

Source File: CorefExample.java From blog-codes with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	Annotation document = new Annotation(
			"Barack Obama was born in Hawaii.  He is the president. Obama was elected in 2008.");
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,coref");
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	pipeline.annotate(document);
	System.out.println("---");
	System.out.println("coref chains");
	for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
		System.out.println("\t" + cc);
	}
	for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
		System.out.println("---");
		System.out.println("mentions");
		for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
			System.out.println("\t" + m);
		}
	}
}

Source File: RelationExtractor.java From InformationExtraction with GNU General Public License v3.0

6 votes

public static HashMap<String, String> extract(String sentence) {
    Annotation doc = new Annotation(sentence);
    pipeline.annotate(doc);
    r.annotate(doc);
    HashMap<String, String> map = new HashMap<String, String>();
    for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){
      List<RelationMention> rls  = s.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
      for(RelationMention rl: rls){
        if(rl.getType().equals("Work_For")){
          System.out.println(rl);
          String organization = "";
          String people = "";
          for (EntityMention entity: rl.getEntityMentionArgs()){
            if(entity.getType().equals("ORGANIZATION")){
              organization = entity.getValue();
            }
            if(entity.getType().equals("PEOPLE")){
              people = entity.getValue();
            }
          }
          map.put(people, organization);
        }
      }
    }
    return map;
}

Source File: ItalianTokenizerAnnotator.java From tint with GNU General Public License v3.0

6 votes

@Override public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
    return new HashSet<>(Arrays.asList(
            CoreAnnotations.TextAnnotation.class,
            CoreAnnotations.TokensAnnotation.class,
            CoreAnnotations.CharacterOffsetBeginAnnotation.class,
            CoreAnnotations.CharacterOffsetEndAnnotation.class,
            CoreAnnotations.BeforeAnnotation.class,
            CoreAnnotations.AfterAnnotation.class,
            CoreAnnotations.TokenBeginAnnotation.class,
            CoreAnnotations.TokenEndAnnotation.class,
            CoreAnnotations.PositionAnnotation.class,
            CoreAnnotations.IndexAnnotation.class,
            CoreAnnotations.OriginalTextAnnotation.class,
            CoreAnnotations.ValueAnnotation.class,
            CoreAnnotations.SentencesAnnotation.class,
            CoreAnnotations.SentenceIndexAnnotation.class
    ));
}

Source File: StanfordCoreNLPTest.java From java_in_examples with Apache License 2.0

6 votes

public static void main(String[] s) {
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // read some text in the text variable
    String text = "\"But I do not want to go among mad people,\" Alice remarked.\n" +
            "\"Oh, you can not help that,\" said the Cat: \"we are all mad here. I am mad. You are mad.\"\n" +
            "\"How do you know I am mad?\" said Alice.\n" +
            "\"You must be,\" said the Cat, \"or you would not have come here.\" This is awful, bad, disgusting";

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
        System.out.println(sentiment + "\t" + sentence);
    }
}

Source File: KBPModel.java From InformationExtraction with GNU General Public License v3.0

6 votes

public static HashMap<RelationTriple, String> extract(String doc) {

        Annotation ann = new Annotation(doc);
        pipeline.annotate(ann);
        HashMap<RelationTriple, String> relations = new HashMap<RelationTriple, String>();

        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            for(RelationTriple r : sentence.get(CoreAnnotations.KBPTriplesAnnotation.class)){
                if(r.relationGloss().trim().equals("per:title")
                        || r.relationGloss().trim().equals("per:employee_of")
                        || r.relationGloss().trim().equals("org:top_members/employees")){
                    relations.put(r, sentence.toString());
                }
            }
        }
        return relations;
    }

Source File: ItalianReadability.java From tint with GNU General Public License v3.0

6 votes

static public void addDescriptionForm(String form, HashMap<Integer, Integer> indexes, int start,
        int numberOfTokens, TreeMap<Integer, DescriptionForm> forms, Annotation annotation,
        HashMap<String, GlossarioEntry> glossario) {
    Integer lemmaIndex = indexes.get(start);
    if (lemmaIndex == null) {
        return;
    }

    CoreLabel firstToken = annotation.get(CoreAnnotations.TokensAnnotation.class).get(lemmaIndex);
    CoreLabel endToken = annotation.get(CoreAnnotations.TokensAnnotation.class)
            .get(lemmaIndex + numberOfTokens - 1);
    Integer beginOffset = firstToken.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    Integer endOffset = endToken.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);

    GlossarioEntry glossarioEntry = glossario.get(form);
    if (glossarioEntry == null) {
        return;
    }

    DescriptionForm descriptionForm = new DescriptionForm(
            beginOffset, endOffset, glossarioEntry);

    forms.put(beginOffset, descriptionForm);
}

Source File: ItalianReadability.java From tint with GNU General Public License v3.0

6 votes

@Override public void addingContentWord(CoreLabel token) {
    super.addingContentWord(token);
    HashMap<Integer, HashMultimap<String, String>> easyWords = model.getEasyWords();
    String simplePos = getGenericPos(token.get(CoreAnnotations.PartOfSpeechAnnotation.class));
    String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);

    token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4);

    if (easyWords.get(3).get(simplePos).contains(lemma)) {
        level3WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3);
    }
    if (easyWords.get(2).get(simplePos).contains(lemma)) {
        level2WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2);
    }
    if (easyWords.get(1).get(simplePos).contains(lemma)) {
        level1WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1);
    }
}

Source File: KBPTest.java From InformationExtraction with GNU General Public License v3.0

6 votes

/**
 * A debugging method to try relation extraction from the console.
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  Properties props = StringUtils.argsToProperties(args);
  props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
  props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");

  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
  IOUtils.console("sentence> ", line -> {
    Annotation ann = new Annotation(line);
    pipeline.annotate(ann);
    for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
      sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
      System.out.println(sentence);
    }
  });
}

Source File: JavaReExTest.java From InformationExtraction with GNU General Public License v3.0

6 votes

public static void main(String[] args){
    try{
      Properties props = StringUtils.argsToProperties(args);
//      props.setProperty("annotators", "tokenize,ssplit,lemma,pos,parse,ner");
      StanfordCoreNLP pipeline = new StanfordCoreNLP();
      String sentence = "John Gerspach was named Chief Financial Officer of Citi in July 2009.";
      Annotation doc = new Annotation(sentence);
      pipeline.annotate(doc);
      RelationExtractorAnnotator r = new RelationExtractorAnnotator(props);
      r.annotate(doc);

      for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){
        System.out.println("For sentence " + s.get(CoreAnnotations.TextAnnotation.class));
        List<RelationMention> rls  = s.get(RelationMentionsAnnotation.class);
        for(RelationMention rl: rls){
          System.out.println(rl.toString());
        }
      }
    }catch(Exception e){
      e.printStackTrace();
    }
  }

Source File: RegexNerTest.java From InformationExtraction with GNU General Public License v3.0

6 votes

public static List<String> extractNER(String doc){
    Annotation document = new Annotation(doc);

    pipeline.annotate(document);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    List<String> result = new ArrayList<String>();
    for(CoreMap sentence: sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        for (CoreLabel token: sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            // this is the text of the token
            String word = token.get(CoreAnnotations.TextAnnotation.class);
            // this is the POS tag of the token
            String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
            // this is the NER label of the token
            String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
            result.add(ne);
        }
    }
    return result;
}

Source File: NerWithDepartmentTest.java From InformationExtraction with GNU General Public License v3.0

6 votes

public static List<String> extractNER(String doc){
    Annotation document = new Annotation(doc);

    pipeline.annotate(document);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    List<String> result = new ArrayList<String>();
    for(CoreMap sentence: sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        for (CoreLabel token: sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            // this is the text of the token
            String word = token.get(CoreAnnotations.TextAnnotation.class);
            // this is the NER label of the token
            String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
            result.add(ne);
            System.out.println(word + "\t" + ne);
        }
    }
    return result;
}

Source File: IntelKBPModel.java From InformationExtraction with GNU General Public License v3.0

6 votes

public static HashMap<RelationTriple, String> extract(String doc) {

        Annotation ann = new Annotation(doc
                .replaceAll("\u00a0", " ")
                .replaceAll("\u200B|\u200C|\u200D|\uFEFF", ""));
        pipeline.annotate(ann);
        HashMap<RelationTriple, String> relations = new HashMap<RelationTriple, String>();

        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            for (RelationTriple r : sentence.get(CoreAnnotations.KBPTriplesAnnotation.class)) {
                if (r.relationGloss().trim().equals("per:title")
                        || r.relationGloss().trim().equals("per:employee_of")
                        || r.relationGloss().trim().equals("org:top_members/employees")
                        || r.relationGloss().trim().equals("per:former_title")) {
                    relations.put(r, sentence.toString());
                }
            }
        }
        return relations;
    }

Source File: IntelKBPAnnotator.java From InformationExtraction with GNU General Public License v3.0

6 votes

/**
 * Annotate all the pronominal mentions in the document.
 *
 * @param ann The document.
 * @return The list of pronominal mentions in the document.
 */
private static List<CoreMap> annotatePronominalMentions(Annotation ann) {
    List<CoreMap> pronouns = new ArrayList<>();
    List<CoreMap> sentences = ann.get(CoreAnnotations.SentencesAnnotation.class);
    for (int sentenceIndex = 0; sentenceIndex < sentences.size(); sentenceIndex++) {
        CoreMap sentence = sentences.get(sentenceIndex);
        Integer annoTokenBegin = sentence.get(CoreAnnotations.TokenBeginAnnotation.class);
        if (annoTokenBegin == null) {
            annoTokenBegin = 0;
        }

        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        for (int tokenIndex = 0; tokenIndex < tokens.size(); tokenIndex++) {
            CoreLabel token = tokens.get(tokenIndex);
            if (kbpIsPronominalMention(token)) {
                CoreMap pronoun = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenIndex, tokenIndex + 1,
                        annoTokenBegin, null, CoreAnnotations.TextAnnotation.class, null);
                pronoun.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex);
                sentence.get(CoreAnnotations.MentionsAnnotation.class).add(pronoun);
                pronouns.add(pronoun);
            }
        }
    }

    return pronouns;
}

Source File: CoreNLPCache.java From phrasal with GNU General Public License v3.0

6 votes

/**
 * Load serialized CoreNLP annotations from a file.
 *  
 * @param filename
 */
public static int loadSerialized(String filename) {
  Annotation annotation = IOTools.deserialize(filename, Annotation.class);
  List<CoreMap> sentenceList = annotation.get(CoreAnnotations.SentencesAnnotation.class);

  if (sentenceList == null) {
    throw new RuntimeException("Unusable annotation (no sentences) in " + filename);
  }
  annotationMap = new HashMap<Integer,CoreMap>(sentenceList.size());
  int maxLineId = 0;
  for (CoreMap annotationSet : sentenceList) {
    // 1-indexed
    int lineId = annotationSet.get(CoreAnnotations.LineNumberAnnotation.class);
    maxLineId = lineId > maxLineId ? lineId : maxLineId;
    annotationMap.put(lineId-1, annotationSet);
  }
  return maxLineId + 1;
}

edu.stanford.nlp.ling.CoreAnnotations Java Examples