edu.stanford.nlp.pipeline.Annotation Java Examples
The following examples show how to use
edu.stanford.nlp.pipeline.Annotation.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConstituentExample.java From blog-codes with Apache License 2.0 | 7 votes |
public static void main(String[] args) { // set up pipeline properties Properties props = new Properties(); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse"); // use faster shift reduce parser //props.setProperty("parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz"); props.setProperty("parse.maxlen", "100"); // set up Stanford CoreNLP pipeline StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // build annotation for a review Annotation annotation = new Annotation("The small red car turned very quickly around the corner."); // annotate pipeline.annotate(annotation); // get tree Tree tree = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(0) .get(TreeCoreAnnotations.TreeAnnotation.class); System.out.println(tree); Set<Constituent> treeConstituents = tree.constituents(new LabeledScoredConstituentFactory()); for (Constituent constituent : treeConstituents) { if (constituent.label() != null && (constituent.label().toString().equals("VP") || constituent.label().toString().equals("NP"))) { System.err.println("found constituent: " + constituent.toString()); System.err.println(tree.getLeaves().subList(constituent.start(), constituent.end() + 1)); } } }
Example #2
Source File: CoreNLP.java From gAnswer with BSD 3-Clause "New" or "Revised" License | 6 votes |
public SemanticGraph getBasicDependencies (String s) { // create an empty Annotation just with the given text Annotation document = new Annotation(s); // run all Annotators on this text pipeline_lemma.annotate(document); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types List<CoreMap> sentences = document.get(SentencesAnnotation.class); for(CoreMap sentence: sentences) { // this is the Stanford dependency graph of the current sentence SemanticGraph dependencies = sentence.get(BasicDependenciesAnnotation.class); return dependencies; } return null; }
Example #3
Source File: JavaClient.java From blog-codes with Apache License 2.0 | 6 votes |
public static void main(String[] args) { // creates a StanfordCoreNLP object with POS tagging, lemmatization, NER, parsing, and coreference resolution Properties props = new Properties(); props.setProperty("annotators", "tokenize,ssplit,pos,ner,depparse,openie"); MultiLangsStanfordCoreNLPClient pipeline = new MultiLangsStanfordCoreNLPClient(props, "http://localhost", 9000, 2, null, null, "zh"); // read some text in the text variable String text = "今天天气很好。"; // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); CoreMap firstSentence = document.get(CoreAnnotations.SentencesAnnotation.class).get(0); // this for loop will print out all of the tokens and the character offset info for (CoreLabel token : firstSentence.get(CoreAnnotations.TokensAnnotation.class)) { System.out.println(token.word() + "\t" + token.beginPosition() + "\t" + token.endPosition()); } }
Example #4
Source File: SentimentAnalyzer.java From blog-codes with Apache License 2.0 | 6 votes |
public SentimentResult getSentimentResult(String text) { SentimentClassification classification = new SentimentClassification(); SentimentResult sentimentResult = new SentimentResult(); if (text != null && text.length() > 0) { Annotation annotation = pipeline.process(text); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); SimpleMatrix simpleMatrix = RNNCoreAnnotations.getPredictions(tree); classification.setVeryNegative((double) Math.round(simpleMatrix.get(0) * 100d)); classification.setNegative((double) Math.round(simpleMatrix.get(1) * 100d)); classification.setNeutral((double) Math.round(simpleMatrix.get(2) * 100d)); classification.setPositive((double) Math.round(simpleMatrix.get(3) * 100d)); classification.setVeryPositive((double) Math.round(simpleMatrix.get(4) * 100d)); String setimentType = sentence.get(SentimentCoreAnnotations.SentimentClass.class); sentimentResult.setSentimentType(setimentType); sentimentResult.setSentimentClass(classification); sentimentResult.setSentimentScore(RNNCoreAnnotations.getPredictedClass(tree)); } } return sentimentResult; }
Example #5
Source File: StanfordCoreNLPTest.java From java_in_examples with Apache License 2.0 | 6 votes |
public static void main(String[] s) { Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // read some text in the text variable String text = "\"But I do not want to go among mad people,\" Alice remarked.\n" + "\"Oh, you can not help that,\" said the Cat: \"we are all mad here. I am mad. You are mad.\"\n" + "\"How do you know I am mad?\" said Alice.\n" + "\"You must be,\" said the Cat, \"or you would not have come here.\" This is awful, bad, disgusting"; // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : sentences) { String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class); System.out.println(sentiment + "\t" + sentence); } }
Example #6
Source File: Chapter5.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 6 votes |
private static void usingStanfordPOSTagger() { Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos"); props.put("pos.model", "C:\\Current Books in Progress\\NLP and Java\\Models\\english-caseless-left3words-distsim.tagger"); props.put("pos.maxlen", 10); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation document = new Annotation(theSentence); pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String word = token.get(TextAnnotation.class); String pos = token.get(PartOfSpeechAnnotation.class); System.out.print(word + "/" + pos + " "); } System.out.println(); try { pipeline.xmlPrint(document, System.out); pipeline.prettyPrint(document, System.out); } catch (IOException ex) { ex.printStackTrace(); } } }
Example #7
Source File: StanfordTokenizer.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { String text = aJCas.getDocumentText(); Annotation document = new Annotation(text); StanfordCoreNLP stanfordCoreNLP; if(!languageMap.containsKey(aJCas.getDocumentLanguage())) { throw new AnalysisEngineProcessException(new LanguageNotSupportedException("Language Not Supported")); } stanfordCoreNLP = stanfordCoreNLPs[languageMap.get(aJCas.getDocumentLanguage())]; stanfordCoreNLP.annotate(document); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : sentences) { int sstart = sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class); int ssend = sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class); Sentence jsentence = new Sentence(aJCas, sstart, ssend); jsentence.addToIndexes(); for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { Token casToken = new Token(aJCas, token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); casToken.addToIndexes(); } } }
Example #8
Source File: StanfordRNNDParser.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { mappingProvider.configure(jCas.getCas()); DKPro2CoreNlp converter = new DKPro2CoreNlp(); Annotation annotatios = converter.convert(jCas, new Annotation()); List<CoreMap> sentences = annotatios.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : sentences) { GrammaticalStructure gs = parser.predict(sentence); SemanticGraph semanticGraph = SemanticGraphFactory.makeFromTree(gs, SemanticGraphFactory.Mode.CCPROCESSED, GrammaticalStructure.Extras.MAXIMAL, null);; semanticGraph.prettyPrint(); semanticGraph = semanticGraphUniversalEnglishToEnglish(semanticGraph); sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, semanticGraph); for(SemanticGraphEdge edge: semanticGraph.edgeListSorted()) { System.out.println(edge); } } convertDependencies(jCas, annotatios, true); }
Example #9
Source File: CorefExample.java From blog-codes with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { Annotation document = new Annotation( "Barack Obama was born in Hawaii. He is the president. Obama was elected in 2008."); Properties props = new Properties(); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,coref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); pipeline.annotate(document); System.out.println("---"); System.out.println("coref chains"); for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) { System.out.println("\t" + cc); } for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) { System.out.println("---"); System.out.println("mentions"); for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) { System.out.println("\t" + m); } } }
Example #10
Source File: CoreNLP.java From gAnswer with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * How to use: * for (CoreLabel token : sentence.get(TokensAnnotation.class)) { * // this is the text of the token * String word = token.get(TextAnnotation.class); * // this is the POS tag of the token * String pos = token.get(PartOfSpeechAnnotation.class); * } * @param s * @return */ public CoreMap getPOS (String s) { // create an empty Annotation just with the given text Annotation document = new Annotation(s); // run all Annotators on this text pipeline_lemma.annotate(document); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types List<CoreMap> sentences = document.get(SentencesAnnotation.class); for(CoreMap sentence: sentences) { // this is the sentence with POS Tags return sentence; } return null; }
Example #11
Source File: ReconTool.java From Criteria2Query with Apache License 2.0 | 6 votes |
public boolean isCEE(String text){ text = text.replace("/", " / "); Annotation annotation = new Annotation(text); pipeline.annotate(annotation); List<CoreMap> sentences = annotation.get(SentencesAnnotation.class); boolean flag=false; for (CoreMap sentence : sentences) { for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String word = token.get(TextAnnotation.class);//token.get(LemmaAnnotation.class);//TextAnnotation.class String pos = token.get(PartOfSpeechAnnotation.class); //String lemma = token.get(LemmaAnnotation.class); boolean f = false; if ((word.equals("and") || word.equals(",") || word.equals("/") || word.equals("or"))) { flag = true; break; } } } return flag; }
Example #12
Source File: CorefTool.java From Criteria2Query with Apache License 2.0 | 6 votes |
public void extractCoref() { String s="Subjects with hypothyroidism who are on stable treatment for 3 months prior to screening are required to have TSH and free thyroxine (FT4) obtained. If the TSH value is out of range, but FT4 is normal, such cases should be discussed directly with the JRD responsible safety physician before the subject is enrolled. If the FT4 value is out of range, the subject is not eligible."; Annotation document = new Annotation(s); Properties props = new Properties(); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,mention,coref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); pipeline.annotate(document); System.out.println("---"); System.out.println("coref chains"); for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) { System.out.println("\t" + cc); } for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) { System.out.println("---"); System.out.println("mentions"); for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) { System.out.println("\t" + m); } } }
Example #13
Source File: CoreNLPCache.java From phrasal with GNU General Public License v3.0 | 6 votes |
/** * Load serialized CoreNLP annotations from a file. * * @param filename */ public static int loadSerialized(String filename) { Annotation annotation = IOTools.deserialize(filename, Annotation.class); List<CoreMap> sentenceList = annotation.get(CoreAnnotations.SentencesAnnotation.class); if (sentenceList == null) { throw new RuntimeException("Unusable annotation (no sentences) in " + filename); } annotationMap = new HashMap<Integer,CoreMap>(sentenceList.size()); int maxLineId = 0; for (CoreMap annotationSet : sentenceList) { // 1-indexed int lineId = annotationSet.get(CoreAnnotations.LineNumberAnnotation.class); maxLineId = lineId > maxLineId ? lineId : maxLineId; annotationMap.put(lineId-1, annotationSet); } return maxLineId + 1; }
Example #14
Source File: ComparisonUtils.java From NLIWOD with GNU Affero General Public License v3.0 | 6 votes |
/** * Retrieves a part of speech from the given string, depending on the parameter tag. * JJR for comparatives and JJS for superlatives. * @param question String to retrieve words from. * @param tag JJR for comparatives and JJS for superlatives. * @return List of the retrieved words. */ private ArrayList<String> getWords(String question, String tag) { if(question == null || tag == null) return null; Annotation annotation = new Annotation(question); PIPELINE.annotate(annotation); List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); ArrayList<String> words = new ArrayList<String>(); for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for(CoreLabel token: tokens) { if(token.tag().startsWith(tag)){ String word = token.toString(); words.add(word.substring(0, word.lastIndexOf("-"))); } } } return words; }
Example #15
Source File: NumberOfToken.java From NLIWOD with GNU Affero General Public License v3.0 | 6 votes |
/*** * Returns a list of all noun phrases of the question q. * @param q a question * @return list of noun phrases */ private ArrayList<String> getNounPhrases(String q) { ArrayList<String> nounP = new ArrayList<String>(); Annotation annotation = new Annotation(q); PIPELINE.annotate(annotation); List<CoreMap> question = annotation.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : question) { SemanticGraph basicDeps = sentence.get(BasicDependenciesAnnotation.class); Collection<TypedDependency> typedDeps = basicDeps.typedDependencies(); Iterator<TypedDependency> dependencyIterator = typedDeps.iterator(); while(dependencyIterator.hasNext()) { TypedDependency dependency = dependencyIterator.next(); String depString = dependency.reln().toString(); if(depString.equals("compound") || depString.equals("amod")) { String dep = dependency.dep().toString(); String gov = dependency.gov().toString(); nounP.add(dep.substring(0, dep.lastIndexOf("/")) + " " + gov.substring(0, gov.lastIndexOf("/"))); } } } return nounP; }
Example #16
Source File: Postprocess.java From phrases with Apache License 2.0 | 6 votes |
public List<Pattern> run(List<Pattern> patterns) { Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, sentiment"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); for (Pattern pattern : patterns) { Annotation annotation = pipeline.process(pattern.toSentences()); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.AnnotatedTree.class); int sentiment = RNNCoreAnnotations.getPredictedClass(tree); for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { String lemma = token.get(CoreAnnotations.LemmaAnnotation.class); } } } return null; }
Example #17
Source File: RelationExtractor.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
public static HashMap<String, String> extract(String sentence) { Annotation doc = new Annotation(sentence); pipeline.annotate(doc); r.annotate(doc); HashMap<String, String> map = new HashMap<String, String>(); for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){ List<RelationMention> rls = s.get(MachineReadingAnnotations.RelationMentionsAnnotation.class); for(RelationMention rl: rls){ if(rl.getType().equals("Work_For")){ System.out.println(rl); String organization = ""; String people = ""; for (EntityMention entity: rl.getEntityMentionArgs()){ if(entity.getType().equals("ORGANIZATION")){ organization = entity.getValue(); } if(entity.getType().equals("PEOPLE")){ people = entity.getValue(); } } map.put(people, organization); } } } return map; }
Example #18
Source File: IntelKBPModel.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
public static HashMap<RelationTriple, String> extract(String doc) { Annotation ann = new Annotation(doc .replaceAll("\u00a0", " ") .replaceAll("\u200B|\u200C|\u200D|\uFEFF", "")); pipeline.annotate(ann); HashMap<RelationTriple, String> relations = new HashMap<RelationTriple, String>(); for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) { for (RelationTriple r : sentence.get(CoreAnnotations.KBPTriplesAnnotation.class)) { if (r.relationGloss().trim().equals("per:title") || r.relationGloss().trim().equals("per:employee_of") || r.relationGloss().trim().equals("org:top_members/employees") || r.relationGloss().trim().equals("per:former_title")) { relations.put(r, sentence.toString()); } } } return relations; }
Example #19
Source File: RegexNerTest.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
public static List<String> extractNER(String doc){ Annotation document = new Annotation(doc); pipeline.annotate(document); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); List<String> result = new ArrayList<String>(); for(CoreMap sentence: sentences) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods for (CoreLabel token: sentence.get(CoreAnnotations.TokensAnnotation.class)) { // this is the text of the token String word = token.get(CoreAnnotations.TextAnnotation.class); // this is the POS tag of the token String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class); // this is the NER label of the token String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class); result.add(ne); } } return result; }
Example #20
Source File: KBPTest.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
/** * A debugging method to try relation extraction from the console. * @throws IOException */ public static void main(String[] args) throws IOException { Properties props = StringUtils.argsToProperties(args); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp"); props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); IOUtils.console("sentence> ", line -> { Annotation ann = new Annotation(line); pipeline.annotate(ann); for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) { sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println); System.out.println(sentence); } }); }
Example #21
Source File: Trees.java From uncc2014watsonsim with GNU General Public License v2.0 | 6 votes |
public static List<CoreMap> parse(String text) { // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types List<CoreMap> sentences = document.get(SentencesAnnotation.class); List<Tree> trees = new ArrayList<>(); List<Tree> dependencies = new ArrayList<>(); for(CoreMap sentence: sentences) { // this is the parse tree of the current sentence Tree t = sentence.get(TreeAnnotation.class); SemanticGraph graph = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); trees.add(t); } return sentences; }
Example #22
Source File: StanfordCoref.java From Graphene with GNU General Public License v3.0 | 6 votes |
@Override public CoreferenceContent doCoreferenceResolution(String text) { Annotation document = new Annotation(text); PIPELINE.annotate(document); // extract sentences List<Sentence> sentences = new ArrayList<>(); for (CoreMap coreMap : document.get(CoreAnnotations.SentencesAnnotation.class)) { Sentence sentence = new Sentence(); for (CoreLabel coreLabel : coreMap.get(CoreAnnotations.TokensAnnotation.class)) { sentence.addWord(coreLabel.word()); } sentences.add(sentence); } // replace coreferences for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) { String coreMention = cc.getRepresentativeMention().mentionSpan; for (CorefChain.CorefMention corefMention : cc.getMentionsInTextualOrder()) { sentences.get(corefMention.sentNum-1).replaceWords(corefMention.startIndex-1, corefMention.endIndex-1, getReplacement(corefMention.mentionSpan, coreMention)); } } return new CoreferenceContent(text, sentences.stream().map(s -> s.toString()).collect(Collectors.joining(" "))); }
Example #23
Source File: Phrase.java From uncc2014watsonsim with GNU General Public License v2.0 | 5 votes |
private static List<Tree> _trees(Phrase p) { // create an empty Annotation just with the given text Annotation document = p.memo(Phrase.coreNLP); try{ // Run the full parse on this text constituency_parse_pipeline.annotate(document); } catch (IllegalArgumentException | NullPointerException ex) { /* * On extremely rare occasions (< 0.00000593% of passages) * it will throw an error like the following: * * Exception in thread "main" java.lang.IllegalArgumentException: * No head rule defined for SYM using class edu.stanford.nlp.trees.SemanticHeadFinder in SYM-10 * * On more frequent occasions, you get the following: * Exception in thread "main" java.lang.NullPointerException * at edu.stanford.nlp.dcoref.RuleBasedCorefMentionFinder.findHead(RuleBasedCorefMentionFinder.java:276) * * Both of these are fatal for the passage. * Neither are a big deal for the index. Forget them. */ } return p.memo(Phrase.sentences) .stream() .map(s -> s.get(TreeAnnotation.class)) .filter(Objects::nonNull) .collect(toList()); }
Example #24
Source File: ItalianStandardReadability.java From tint with GNU General Public License v3.0 | 5 votes |
public ItalianStandardReadability(Properties globalProperties, Properties localProperties, Annotation annotation) { super(globalProperties, localProperties, annotation); contentPosList.add("S"); contentPosList.add("A"); contentPosList.add("V"); contentPosList.add("B"); simplePosList.add("S"); simplePosList.add("V"); nonWordPosList.add("F"); genericPosDescription.put("A", "Adjective"); genericPosDescription.put("B", "Adverb"); genericPosDescription.put("S", "Noun"); genericPosDescription.put("E", "Preposition"); genericPosDescription.put("C", "Conjunction"); genericPosDescription.put("P", "Pronoun"); genericPosDescription.put("R", "Determiner"); genericPosDescription.put("F", "Punctuation"); genericPosDescription.put("D", "Adj. (det.)"); genericPosDescription.put("V", "Verb"); genericPosDescription.put("X", "Other"); genericPosDescription.put("N", "Number"); }
Example #25
Source File: JsonPipeline.java From tac2015-event-detection with GNU General Public License v3.0 | 5 votes |
List getCorefInfo(Annotation doc) { Map<Integer, CorefChain> corefChains = doc.get(CorefChainAnnotation.class); // List<CoreMap> sentences = doc.get(SentencesAnnotation.class); List entities = new ArrayList(); for (CorefChain chain : corefChains.values()) { List mentions = new ArrayList(); CorefChain.CorefMention representative = chain.getRepresentativeMention(); for (CorefChain.CorefMention corement : chain.getMentionsInTextualOrder()) { Map outment = new HashMap(); outment.put("sentence", corement.sentNum-1); outment.put("tokspan_in_sentence", Lists.newArrayList( corement.startIndex-1, corement.endIndex-1)); outment.put("head",corement.headIndex-1); outment.put("gender", corement.gender.toString()); outment.put("animacy", corement.animacy.toString()); outment.put("number", corement.number.toString()); outment.put("mentiontype", corement.mentionType.toString()); outment.put("mentionid", corement.mentionID); if (representative!=null && corement.mentionID==representative.mentionID) { outment.put("representative", true); } mentions.add(outment); } Map entity = ImmutableMap.builder() .put("mentions", mentions) .put("entityid", chain.getChainID()) .build(); entities.add(entity); } return entities; }
Example #26
Source File: StopwordAnnotatorTest.java From coreNlp with Apache License 2.0 | 5 votes |
/** * Test to validate that the custom stopword list words * @throws Exception */ @org.junit.Test public void testCustomStopwordList() throws Exception { //setup coreNlp properties for stopwords. Note the custom stopword list property Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, stopword"); props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator"); props.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList); //get the custom stopword set Set<?> stopWords = StopwordAnnotator.getStopWordList(Version.LUCENE_36, customStopWordList, true); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation document = new Annotation(example); pipeline.annotate(document); List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel token : tokens) { //get the stopword annotation Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class); String word = token.word().toLowerCase(); if (stopWords.contains(word)) { assertTrue(stopword.first()); } else { assertFalse(stopword.first()); } //not checking lemma, so always false assertFalse(stopword.second()); } }
Example #27
Source File: JsonPipeline.java From tac2015-event-detection with GNU General Public License v3.0 | 5 votes |
/** runs the corenlp pipeline with all options, and returns all results as a JSON object. */ @SuppressWarnings({ "rawtypes", "unchecked" }) JsonNode processTextDocument(String doctext) { if (startMilli==-1) startMilli = System.currentTimeMillis(); numDocs++; numChars += doctext.length(); Annotation document = new Annotation(doctext); pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); List<Map> outSentences = Lists.newArrayList(); for(CoreMap sentence: sentences) { Map<String,Object> sent_info = Maps.newHashMap(); addTokenBasics(sent_info, sentence); numTokens += ((List) sent_info.get("tokens")).size(); for (String annotator : annotators()) { addAnnoToSentenceObject(sent_info, sentence, annotator); } outSentences.add(sent_info); } ImmutableMap.Builder b = new ImmutableMap.Builder(); // b.put("text", doctext); b.put("sentences", outSentences); if (Lists.newArrayList(annotators()).contains("dcoref")) { List outCoref = getCorefInfo(document); b.put("entities", outCoref); } Map outDoc = b.build(); return JsonUtil.toJson(outDoc); }
Example #28
Source File: VerbTest.java From tint with GNU General Public License v3.0 | 5 votes |
public static void main(String[] args) { try { Properties properties = new Properties(); properties.setProperty("annotators", "ita_toksent, udpipe, ita_verb"); properties.setProperty("customAnnotatorClass.udpipe", "eu.fbk.fcw.udpipe.api.UDPipeAnnotator"); properties.setProperty("customAnnotatorClass.ita_toksent", "eu.fbk.dh.tint.tokenizer.annotators.ItalianTokenizerAnnotator"); properties.setProperty("customAnnotatorClass.ita_verb", "eu.fbk.dh.tint.verb.VerbAnnotator"); properties.setProperty("udpipe.server", "gardner"); properties.setProperty("udpipe.port", "50020"); properties.setProperty("udpipe.keepOriginal", "1"); // properties.setProperty("udpipe.model", "/Users/alessio/Desktop/model"); StanfordCoreNLP pipeline = new StanfordCoreNLP(properties); Annotation annotation = new Annotation("Il caporale alpino Giampietro Civati caduto in combattimento il 5 dicembre 1944, come racconta Silvestri, ha scritto questo mirabile testamento: «sono figlio d’Italia, d’anni 21, non di Graziani e nemmeno di Badoglio, ma sono italiano e seguo la via che salverà l’onore d’Italia»."); pipeline.annotate(annotation); String out = JSONOutputter.jsonPrint(annotation); System.out.println(out); // for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { // System.out.println(sentence.get(VerbAnnotations.VerbsAnnotation.class)); // } } catch (Exception e) { e.printStackTrace(); } }
Example #29
Source File: DigiCompMorphAnnotator.java From tint with GNU General Public License v3.0 | 5 votes |
@Override public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel c : tokens) { String[] morph_fatures = c.get(DigiMorphAnnotations.MorphoAnnotation.class).split(" "); String lemma = c.get(CoreAnnotations.LemmaAnnotation.class); if (morph_fatures.length > 1) { List<String> comps = new ArrayList<>(); for (String m : morph_fatures) { if (m.startsWith(lemma + "+") || m.startsWith(lemma + "~")) { comps.add(m); } } c.set(DigiMorphAnnotations.MorphoCompAnnotation.class, comps); } else { if (morph_fatures[0].startsWith(lemma + "+") || morph_fatures[0].startsWith(lemma + "~")) { c.set(DigiMorphAnnotations.MorphoCompAnnotation.class, new ArrayList<String>(Arrays.asList(morph_fatures[0]))); } } } } } }
Example #30
Source File: ItalianTokenizerAnnotator.java From tint with GNU General Public License v3.0 | 5 votes |
/** * Given an Annotation, perform a task on this Annotation. * * @param annotation */ @Override public void annotate(Annotation annotation) { String text = annotation.get(CoreAnnotations.TextAnnotation.class); List<List<CoreLabel>> sTokens = tokenizer .parse(text, newlineIsSentenceBreak, tokenizeOnlyOnSpace, ssplitOnlyOnNewLine); Utils.addBasicAnnotations(annotation, sTokens, text); }