Java Code Examples for edu.stanford.nlp.util.CoreMap#get()
The following examples show how to use
edu.stanford.nlp.util.CoreMap#get() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: NumberOfToken.java From NLIWOD with GNU Affero General Public License v3.0 | 6 votes |
/*** * Returns a list of all noun phrases of the question q. * @param q a question * @return list of noun phrases */ private ArrayList<String> getNounPhrases(String q) { ArrayList<String> nounP = new ArrayList<String>(); Annotation annotation = new Annotation(q); PIPELINE.annotate(annotation); List<CoreMap> question = annotation.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : question) { SemanticGraph basicDeps = sentence.get(BasicDependenciesAnnotation.class); Collection<TypedDependency> typedDeps = basicDeps.typedDependencies(); Iterator<TypedDependency> dependencyIterator = typedDeps.iterator(); while(dependencyIterator.hasNext()) { TypedDependency dependency = dependencyIterator.next(); String depString = dependency.reln().toString(); if(depString.equals("compound") || depString.equals("amod")) { String dep = dependency.dep().toString(); String gov = dependency.gov().toString(); nounP.add(dep.substring(0, dep.lastIndexOf("/")) + " " + gov.substring(0, gov.lastIndexOf("/"))); } } } return nounP; }
Example 2
Source File: StanfordCoref.java From Graphene with GNU General Public License v3.0 | 6 votes |
@Override public CoreferenceContent doCoreferenceResolution(String text) { Annotation document = new Annotation(text); PIPELINE.annotate(document); // extract sentences List<Sentence> sentences = new ArrayList<>(); for (CoreMap coreMap : document.get(CoreAnnotations.SentencesAnnotation.class)) { Sentence sentence = new Sentence(); for (CoreLabel coreLabel : coreMap.get(CoreAnnotations.TokensAnnotation.class)) { sentence.addWord(coreLabel.word()); } sentences.add(sentence); } // replace coreferences for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) { String coreMention = cc.getRepresentativeMention().mentionSpan; for (CorefChain.CorefMention corefMention : cc.getMentionsInTextualOrder()) { sentences.get(corefMention.sentNum-1).replaceWords(corefMention.startIndex-1, corefMention.endIndex-1, getReplacement(corefMention.mentionSpan, coreMention)); } } return new CoreferenceContent(text, sentences.stream().map(s -> s.toString()).collect(Collectors.joining(" "))); }
Example 3
Source File: CoreNLPCache.java From phrasal with GNU General Public License v3.0 | 6 votes |
/** * Load serialized CoreNLP annotations from a file. * * @param filename */ public static int loadSerialized(String filename) { Annotation annotation = IOTools.deserialize(filename, Annotation.class); List<CoreMap> sentenceList = annotation.get(CoreAnnotations.SentencesAnnotation.class); if (sentenceList == null) { throw new RuntimeException("Unusable annotation (no sentences) in " + filename); } annotationMap = new HashMap<Integer,CoreMap>(sentenceList.size()); int maxLineId = 0; for (CoreMap annotationSet : sentenceList) { // 1-indexed int lineId = annotationSet.get(CoreAnnotations.LineNumberAnnotation.class); maxLineId = lineId > maxLineId ? lineId : maxLineId; annotationMap.put(lineId-1, annotationSet); } return maxLineId + 1; }
Example 4
Source File: CoreNLP.java From gAnswer with BSD 3-Clause "New" or "Revised" License | 6 votes |
public Tree getParseTree (String text) { // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline_lemma.annotate(document); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types List<CoreMap> sentences = document.get(SentencesAnnotation.class); for(CoreMap sentence: sentences) { // this is the parse tree of the current sentence return sentence.get(TreeAnnotation.class); } return null; }
Example 5
Source File: ParserAnnotatorUtils.java From Heracles with GNU General Public License v3.0 | 6 votes |
/** * Set the tags of the original tokens and the leaves if they * aren't already set. */ public static void setMissingTags(CoreMap sentence, Tree tree) { List<TaggedWord> taggedWords = null; List<Label> leaves = null; List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (int i = 0, size = tokens.size(); i < size; ++i) { CoreLabel token = tokens.get(i); if (token.tag() == null) { if (taggedWords == null) { taggedWords = tree.taggedYield(); } if (leaves == null) { leaves = tree.yield(); } token.setTag(taggedWords.get(i).tag()); Label leaf = leaves.get(i); if (leaf instanceof HasTag) { ((HasTag) leaf).setTag(taggedWords.get(i).tag()); } } } }
Example 6
Source File: NerWithDepartmentTest.java From InformationExtraction with GNU General Public License v3.0 | 6 votes |
public static List<String> extractNER(String doc){ Annotation document = new Annotation(doc); pipeline.annotate(document); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); List<String> result = new ArrayList<String>(); for(CoreMap sentence: sentences) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods for (CoreLabel token: sentence.get(CoreAnnotations.TokensAnnotation.class)) { // this is the text of the token String word = token.get(CoreAnnotations.TextAnnotation.class); // this is the NER label of the token String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class); result.add(ne); System.out.println(word + "\t" + ne); } } return result; }
Example 7
Source File: StanfordTokenizer.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { String text = aJCas.getDocumentText(); Annotation document = new Annotation(text); StanfordCoreNLP stanfordCoreNLP; if(!languageMap.containsKey(aJCas.getDocumentLanguage())) { throw new AnalysisEngineProcessException(new LanguageNotSupportedException("Language Not Supported")); } stanfordCoreNLP = stanfordCoreNLPs[languageMap.get(aJCas.getDocumentLanguage())]; stanfordCoreNLP.annotate(document); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : sentences) { int sstart = sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class); int ssend = sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class); Sentence jsentence = new Sentence(aJCas, sstart, ssend); jsentence.addToIndexes(); for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { Token casToken = new Token(aJCas, token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); casToken.addToIndexes(); } } }
Example 8
Source File: JavaClient.java From blog-codes with Apache License 2.0 | 6 votes |
public static void main(String[] args) { // creates a StanfordCoreNLP object with POS tagging, lemmatization, NER, parsing, and coreference resolution Properties props = new Properties(); props.setProperty("annotators", "tokenize,ssplit,pos,ner,depparse,openie"); MultiLangsStanfordCoreNLPClient pipeline = new MultiLangsStanfordCoreNLPClient(props, "http://localhost", 9000, 2, null, null, "zh"); // read some text in the text variable String text = "今天天气很好。"; // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); CoreMap firstSentence = document.get(CoreAnnotations.SentencesAnnotation.class).get(0); // this for loop will print out all of the tokens and the character offset info for (CoreLabel token : firstSentence.get(CoreAnnotations.TokensAnnotation.class)) { System.out.println(token.word() + "\t" + token.beginPosition() + "\t" + token.endPosition()); } }
Example 9
Source File: SentimentAnalyzer.java From blog-codes with Apache License 2.0 | 6 votes |
public SentimentResult getSentimentResult(String text) { SentimentClassification classification = new SentimentClassification(); SentimentResult sentimentResult = new SentimentResult(); if (text != null && text.length() > 0) { Annotation annotation = pipeline.process(text); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); SimpleMatrix simpleMatrix = RNNCoreAnnotations.getPredictions(tree); classification.setVeryNegative((double) Math.round(simpleMatrix.get(0) * 100d)); classification.setNegative((double) Math.round(simpleMatrix.get(1) * 100d)); classification.setNeutral((double) Math.round(simpleMatrix.get(2) * 100d)); classification.setPositive((double) Math.round(simpleMatrix.get(3) * 100d)); classification.setVeryPositive((double) Math.round(simpleMatrix.get(4) * 100d)); String setimentType = sentence.get(SentimentCoreAnnotations.SentimentClass.class); sentimentResult.setSentimentType(setimentType); sentimentResult.setSentimentClass(classification); sentimentResult.setSentimentScore(RNNCoreAnnotations.getPredictedClass(tree)); } } return sentimentResult; }
Example 10
Source File: CoreNLPParser.java From Heracles with GNU General Public License v3.0 | 5 votes |
@Override public void validatedProcess(Dataset dataset, String spanTypeOfSentenceUnit) { Properties prop1 = new Properties(); prop1.setProperty("annotators", "parse"); StanfordCoreNLP pipeline = new StanfordCoreNLP(prop1, false); for (Span span : dataset.getSpans(spanTypeOfSentenceUnit)){ HashMap<Integer, Word> wordIndex = new HashMap<>(); Annotation a = CoreNLPHelper.reconstructStanfordAnnotations(span, wordIndex); // Annotation a = new Annotation((String)span.getAnnotations().get("text")); if (a == null){ System.out.println(a); } pipeline.annotate(a); for (CoreMap sentence : a.get(SentencesAnnotation.class)){ //per sentence, check the syntax tree Tree tree = sentence.get(TreeAnnotation.class); // tree.percolateHeadAnnotations(headFinder); // tree.indentedListPrint(); try { analyzeTree(tree, span, wordIndex); } catch (IllegalSpanException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } }
Example 11
Source File: DigiInverseMorphAnnotator.java From tint with GNU General Public License v3.0 | 5 votes |
public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel c : tokens) { String[] morph_features = c.get(DigiMorphAnnotations.MorphoAnnotation.class).split(" "); c.set(DigiInverseMorphAnnotations.InverseMorphoAnnotation.class, morph_features[0]); } } } }
Example 12
Source File: CoreNLPNamedEntityRecognizer.java From Heracles with GNU General Public License v3.0 | 5 votes |
/** * Process the Dataset in chunks, as defined by the <code>spanType</code> parameter. * The Spans denoted by spanType must each contain Words belonging to a single sentence. * */ @Override public void validatedProcess(Dataset dataset, String spanTypeOfSentenceUnit){ Properties prop1 = new Properties(); prop1.setProperty("annotators", "ner"); StanfordCoreNLP pipeline = new StanfordCoreNLP(prop1, false); for (Span span : dataset.getSpans(spanTypeOfSentenceUnit)){ HashMap<Integer, Word> wordIndex = new HashMap<>(); Annotation a = CoreNLPHelper.reconstructStanfordAnnotations(span, wordIndex); if (a == null){ System.out.println(a); } pipeline.annotate(a); List<CoreMap> sentenceAnnotations = a.get(SentencesAnnotation.class); for (CoreMap sentence : sentenceAnnotations){ for (CoreLabel token: sentence.get(TokensAnnotation.class)) { Word w = wordIndex.get(token.get(CharacterOffsetBeginAnnotation.class)); String ner = token.get(NamedEntityTagAnnotation.class); String nerValue = token.get(NormalizedNamedEntityTagAnnotation.class); if (ner!=null) w.putAnnotation("nerLabel", ner); if (nerValue!=null) w.putAnnotation("nerValue", nerValue); // System.out.println(w.getAnnotations()); } } } }
Example 13
Source File: Entity.java From NLIWOD with GNU Affero General Public License v3.0 | 5 votes |
/*** * Checks if there is an entity of the specified type in the question. * @param entityType an entity type: Date, Location, Organization, Person, Percent, or Money * @param question * @return if a entity of that type is present returns the name of the type otherwise "No" + the name of the type */ protected String recognizeEntity(String entityType, String question){ String result = "No" + entityType; Annotation annotation = new Annotation(question); pipeline.annotate(annotation); List<CoreMap> sentences = annotation.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) for (CoreLabel token: sentence.get(TokensAnnotation.class)) { String ne = token.get(NamedEntityTagAnnotation.class); if(entityType.toUpperCase().equals(ne)) result = entityType; } return result; }
Example 14
Source File: CoreNLP.java From gAnswer with BSD 3-Clause "New" or "Revised" License | 5 votes |
public Word[] getTaggedWords (String sentence) { CoreMap taggedSentence = getPOS(sentence); Word[] ret = new Word[taggedSentence.get(TokensAnnotation.class).size()]; int count = 0; for (CoreLabel token : taggedSentence.get(TokensAnnotation.class)) { // this is the text of the token String word = token.get(TextAnnotation.class); // this is the POS tag of the token String pos = token.get(PartOfSpeechAnnotation.class); //System.out.println(word+"["+pos+"]"); ret[count] = new Word(getBaseFormOfPattern(word.toLowerCase()), word, pos, count+1); count ++; } return ret; }
Example 15
Source File: SentimentAnalyzer.java From hazelcast-jet-demos with Apache License 2.0 | 5 votes |
private double getOverallSentiment(List<CoreMap> sentences) { double sum = 0; int numberOfSentences = 0; for (CoreMap sentence : sentences) { Tree sentiments = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); int predictedClass = RNNCoreAnnotations.getPredictedClass(sentiments); if (predictedClass == 2) { // neutral sentiment continue; } sum += predictedClass; numberOfSentences++; } return numberOfSentences == 0 ? 0 : (sum / numberOfSentences - 2) / 2; }
Example 16
Source File: JsonPipeline.java From tac2015-event-detection with GNU General Public License v3.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) static void addTokenAnno(Map<String,Object> sent_info, CoreMap sentence, String keyname, Class annoClass) { List<String> tokenAnnos = Lists.newArrayList(); for (CoreLabel token: sentence.get(TokensAnnotation.class)) { tokenAnnos.add(token.getString(annoClass)); } sent_info.put(keyname, (Object) tokenAnnos); }
Example 17
Source File: JsonPipeline.java From tac2015-event-detection with GNU General Public License v3.0 | 4 votes |
@SuppressWarnings("rawtypes") static void addDepsCC(Map<String,Object> sent_info, CoreMap sentence) { SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); List deps = jsonFriendlyDeps(dependencies); sent_info.put("deps_cc", deps); }
Example 18
Source File: Simplifier.java From tint with GNU General Public License v3.0 | 4 votes |
public static void main(String[] args) { String sentenceText; sentenceText = "Per gli interventi di seguito descritti, la cui autorizzazione può risultare di competenza dei Comuni o delle CTC in relazione alla tipologia ed alla localizzazione dell'intervento, si indicano i seguenti elaborati, precisando che essi sono orientativi e che comunque devono mostrare chiaramente dove si interviene e come si interviene."; sentenceText = "Il mondo, precisando che si tratta della Terra, è molto bello."; sentenceText = "In particolare, andranno rilevati e descritti tutti gli elementi di criticità paesaggistica, insiti nel progetto, e andranno messi in relazione a quanto è stato operato, per eliminare o mitigare tali criticità (impatti), garantendo così un migliore inserimento paesaggistico dell'intervento."; sentenceText = "In funzione della tipologia dell'opera, oggetto di richiesta di autorizzazione, sono previste due forme diverse di relazione paesaggistica, denominate rispettivamente:"; sentenceText = "Sebbene non sappia l'inglese, si è fatto capire dai turisti."; // sentenceText = "Io cancello il gesso dalla lavagna."; try { TintPipeline pipeline = new TintPipeline(); pipeline.loadDefaultProperties(); pipeline.setProperty("annotators", "ita_toksent, udpipe, ita_morpho, ita_lemma, ita_comp_morpho"); pipeline.setProperty("customAnnotatorClass.udpipe", "eu.fbk.dh.fcw.udpipe.api.UDPipeAnnotator"); pipeline.setProperty("customAnnotatorClass.ita_comp_morpho", "eu.fbk.dh.tint.digimorph.annotator.DigiCompMorphAnnotator"); pipeline.setProperty("udpipe.server", "gardner"); pipeline.setProperty("udpipe.port", "50020"); pipeline.setProperty("udpipe.keepOriginal", "1"); pipeline.load(); Annotation annotation = pipeline.runRaw(sentenceText); System.out.println(JSONOutputter.jsonPrint(annotation)); Map<Integer, HashMultimap<Integer, Integer>> children = new HashMap<>(); List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); for (int sentIndex = 0; sentIndex < sentences.size(); sentIndex++) { CoreMap sentence = sentences.get(sentIndex); children.put(sentIndex, HashMultimap.create()); SemanticGraph semanticGraph = sentence .get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class); Collection<IndexedWord> rootNodes = semanticGraph.getRoots(); if (rootNodes.isEmpty()) { continue; } for (IndexedWord root : rootNodes) { Set<Integer> stack = new HashSet<>(); Set<IndexedWord> used = new HashSet<>(); addChildren(children.get(sentIndex), stack, root, semanticGraph, used); } } SimplificationRule rule; String output; rule = new ReplaceSubordinateRule(); output = rule.apply(annotation, children); System.out.println(output); // rule = new DenominatiSplittingRule(); // output = rule.apply(annotation, children); // // System.out.println(output); // // rule = new GarantendoSplittingRule(); // output = rule.apply(annotation, children); // // System.out.println(output); // // rule = new GarantendoSplittingRule(); // output = rule.apply(annotation, children); // // System.out.println(output); // } catch (Exception e) { e.printStackTrace(); } }
Example 19
Source File: CoreNLPSentimentAnnotator.java From Heracles with GNU General Public License v3.0 | 4 votes |
@Override public void validatedProcess(Dataset dataset, String spanTypeOfSentenceUnit) { Properties prop1 = new Properties(); prop1.setProperty("annotators", "parse sentiment"); StanfordCoreNLP pipeline = new StanfordCoreNLP(prop1, false); for (Span span : dataset.getSpans(spanTypeOfSentenceUnit)){ HashMap<Integer, Word> wordIndex = new HashMap<>(); Annotation a = CoreNLPHelper.reconstructStanfordAnnotations(span, wordIndex, true); pipeline.annotate(a); for (CoreMap sentence : a.get(SentencesAnnotation.class)){ Tree sentimentTree = sentence.get(SentimentAnnotatedTree.class); sentimentTree.setSpans(); sentimentTree.indexLeaves(); sentimentTree.indexSpans(); sentimentTree.percolateHeadIndices(); // for (CoreLabel cl : sentimentTree.taggedLabeledYield()){ // Main.debug(""+cl.beginPosition()+"\t"+cl.get(CharacterOffsetBeginAnnotation.class)); // Main.debug(cl.index() + "\t" + cl.keySet()); // } // sentimentTree.indentedListPrint(); // sentence.get(TreeAnnotation.class).indentedListPrint(); SimpleMatrix sm = RNNCoreAnnotations.getPredictions(sentimentTree); assignSentiment(span, sm, "phraseSentiment"); // Main.debug(sm.toString()); // //assign begin positions to each word in the tree because those seem to be missing // int order = 0; // ArrayList<edu.stanford.nlp.ling.Word> stanfordWords = sentimentTree.yieldWords(); // for (Word w : span){ // stanfordWords.get(order).setBeginPosition(w.getStartOffset()); // order++; // } try { analyzeTree(sentimentTree, span, wordIndex,0); } catch (IllegalSpanException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } }
Example 20
Source File: WiseOwlStanfordFilter.java From wiseowl with MIT License | 4 votes |
public Iterator findTokens() throws IOException { /*char[] c = new char[256]; int sz = 0; StringBuilder b = new StringBuilder(); while ((sz = input.read(c)) >= 0) { b.append(c, 0, sz); }*/ //String text = b.toString(); if (!input.incrementToken()) return null; String text; text = input.getAttribute(CharTermAttribute.class).toString(); // read some text in the text variable //System.out.println("before annotation"); Annotation document = new Annotation(text); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types pipeline.annotate(document); List<CoreMap> timexAnnsAll = document.get(TimeAnnotations.TimexAnnotations.class); for (CoreMap cm : timexAnnsAll) { List<CoreLabel> tokens = cm.get(CoreAnnotations.TokensAnnotation.class); TimeData td=new TimeData(); td.setTime(cm.get(TimeExpression.Annotation.class).getTemporal().toString()); td.setStart(tokens.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)); td.setEnd(tokens.get(tokens.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); timeQueue.add(td); } List<CoreMap> sentences = document.get(SentencesAnnotation.class); //System.out.println("after annotation and sentence getting"+sentences.size()); for(CoreMap sentence: sentences) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods for (CoreLabel token: sentence.get(TokensAnnotation.class)) { // this is the text of the token System.out.println("in token"); String word = token.get(TextAnnotation.class); // this is the POS tag of the token String pos = token.get(PartOfSpeechAnnotation.class); // this is the NER label of the token String ne = token.get(NamedEntityTagAnnotation.class); // System.out.println("word : "+word+" pos: "+pos+" ner: "+ne); TokenData tok=new TokenData(); tok.setNER(ne); tok.setToken(word); tok.setPOS(pos); tokenQueue.add(tok); } } Iterator<TokenData> it=tokenQueue.iterator(); itr_cpy=tokenQueue.iterator(); tokenOffset=0; start=0; end=0; return it; }