edu.stanford.nlp.ling.TaggedWord Java Examples
The following examples show how to use
edu.stanford.nlp.ling.TaggedWord.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Main.java From dependensee with GNU General Public License v2.0 | 6 votes |
private static Graph getGraph(Tree tree, Collection<TypedDependency> tdl) throws Exception { ArrayList<TaggedWord> words = tree.taggedYield(); Graph g = new Graph(words); for (TypedDependency td : tdl) { g.addEdge(td.gov().index() - 1, td.dep().index() - 1, td.reln().toString()); } try { g.setRoot(GrammaticalStructure.getRoots(tdl).iterator().next().gov().toString()); } catch (Exception ex) { //System.err.println("Cannot find dependency graph root. Setting root to first"); if (g.nodes.size() > 0) { g.setRoot(g.nodes.get(0).label); } } return g; }
Example #2
Source File: CorenlpPipeline.java From datashare with GNU Affero General Public License v3.0 | 6 votes |
/** * Part-of-Speech Classification (Maximum entropy) only * * @param input the string to annotator * @param hash the input hash code * @param language the input language */ private Annotations processPosClassifier(String input, String hash, Language language) throws InterruptedException { Annotations annotations = new Annotations(hash, getType(), language); LOGGER.info("POS-tagging for " + language.toString()); // Split input into sentences final CoreNlpAnnotator<MaxentTagger> nlpAnnotator; nlpAnnotator = CoreNlpPosModels.getInstance().get(language); List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new StringReader(input)); for (List<HasWord> sentence : sentences) { // NlpTag with parts-of-speech List<TaggedWord> taggedSentence = nlpAnnotator.annotator.tagSentence(sentence); // Feed annotatopn for (TaggedWord word : taggedSentence) { int begin = word.beginPosition(); int end = word.endPosition(); String pos = word.tag(); // like line 157 we don't use POS tagging annotations.add(POS, begin, end); } } return annotations; }
Example #3
Source File: DependencyParser.java From JHazm with MIT License | 6 votes |
public ConcurrentDependencyGraph rawParse(List<TaggedWord> sentence) throws IOException, MaltChainedException { String[] conll = new String[sentence.size()]; for (int i = 0; i < sentence.size(); i++) { TaggedWord taggedWord = sentence.get(i); String word = taggedWord.word(); String Lemma = "_"; if (this.lemmatizer != null) Lemma = this.lemmatizer.lemmatize(word); String pos = taggedWord.tag(); conll[i] = String.format("%s\t%s\t%s\t%s\t%s\t%s", i + 1, word, Lemma, pos, pos, "_"); } return parse(conll); }
Example #4
Source File: PeykareReader.java From JHazm with MIT License | 6 votes |
/** * Join verb parts like Dadedgan corpus. * Input: * دیده/ADJ_INO * شد/V_PA * Iutput: * دیده شد/V_PA */ public static List<TaggedWord> joinVerbParts(List<TaggedWord> sentence) { Collections.reverse(sentence); List<TaggedWord> result = new ArrayList<>(); TaggedWord beforeTaggedWord = new TaggedWord("", ""); for (TaggedWord taggedWord : sentence) { if (PeykareReader.tokenizer.getBeforeVerbs().contains(taggedWord.word()) || (PeykareReader.tokenizer.getAfterVerbs().contains(beforeTaggedWord.word()) && PeykareReader.tokenizer.getVerbs().contains(taggedWord.word()))) { beforeTaggedWord.setWord(taggedWord.word() + " " + beforeTaggedWord.word()); if (result.isEmpty()) result.add(beforeTaggedWord); } else { result.add(taggedWord); beforeTaggedWord = taggedWord; } } Collections.reverse(result); return result; }
Example #5
Source File: BijankhanReaderTest.java From JHazm with MIT License | 6 votes |
@Test public void posMapTest() throws IOException { BijankhanReader reader = new BijankhanReader(false); List<TaggedWord> expected = new ArrayList<>(); expected.add(new TaggedWord("اولین", "ADJ")); expected.add(new TaggedWord("سیاره", "N")); expected.add(new TaggedWord("خارج", "ADJ")); expected.add(new TaggedWord("از", "PREP")); expected.add(new TaggedWord("منظومه", "N")); expected.add(new TaggedWord("شمسی", "ADJ")); expected.add(new TaggedWord("دیده", "ADJ")); expected.add(new TaggedWord("شد", "V")); expected.add(new TaggedWord(".", "PUNC")); Iterator<List<TaggedWord>> iter = reader.getSentences().iterator(); List<TaggedWord> actual = iter.next(); assertEquals("Failed to map pos of sentence", expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { TaggedWord actualTaggedWord = actual.get(i); TaggedWord expectedTaggedWord = expected.get(i); if (!actualTaggedWord.tag().equals(expectedTaggedWord.tag())) assertEquals("Failed to map pos of sentence", expectedTaggedWord, actualTaggedWord); } }
Example #6
Source File: BijankhanReaderTest.java From JHazm with MIT License | 6 votes |
@Test public void joinVerbPartsTest() throws IOException { BijankhanReader reader = new BijankhanReader(null); List<TaggedWord> expected = new ArrayList<>(); expected.add(new TaggedWord("اولین", "ADJ_SUP")); expected.add(new TaggedWord("سیاره", "N_SING")); expected.add(new TaggedWord("خارج", "ADJ_SIM")); expected.add(new TaggedWord("از", "P")); expected.add(new TaggedWord("منظومه", "N_SING")); expected.add(new TaggedWord("شمسی", "ADJ_SIM")); expected.add(new TaggedWord("دیده شد", "V_PA")); expected.add(new TaggedWord(".", "DELM")); Iterator<List<TaggedWord>> iter = reader.getSentences().iterator(); List<TaggedWord> actual = iter.next(); assertEquals("Failed to join verb parts of sentence", expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { TaggedWord actualTaggedWord = actual.get(i); TaggedWord expectedTaggedWord = expected.get(i); if (!actualTaggedWord.tag().equals(expectedTaggedWord.tag())) assertEquals("Failed to join verb parts of sentence", expectedTaggedWord, actualTaggedWord); } }
Example #7
Source File: BijankhanReaderTest.java From JHazm with MIT License | 6 votes |
@Test public void posMapJoinVerbPartsTest() throws IOException { BijankhanReader reader = new BijankhanReader(); List<TaggedWord> expected = new ArrayList<>(); expected.add(new TaggedWord("اولین", "ADJ")); expected.add(new TaggedWord("سیاره", "N")); expected.add(new TaggedWord("خارج", "ADJ")); expected.add(new TaggedWord("از", "PREP")); expected.add(new TaggedWord("منظومه", "N")); expected.add(new TaggedWord("شمسی", "ADJ")); expected.add(new TaggedWord("دیده شد", "V")); expected.add(new TaggedWord(".", "PUNC")); Iterator<List<TaggedWord>> iter = reader.getSentences().iterator(); List<TaggedWord> actual = iter.next(); assertEquals("Failed to map pos and join verb parts of sentence", expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { TaggedWord actualTaggedWord = actual.get(i); TaggedWord expectedTaggedWord = expected.get(i); if (!actualTaggedWord.tag().equals(expectedTaggedWord.tag())) assertEquals("Failed to map pos and join verb parts of sentence", expectedTaggedWord, actualTaggedWord); } }
Example #8
Source File: Parser.java From jatecs with GNU General Public License v3.0 | 6 votes |
private Vector<String> C(ArrayList<TaggedWord> sentence, IntegerMangi index) { normalization_V = new int[2]; normalization_NF = new int[2]; normalization_ADJF = new Vector<int[]>(); int startIndex = index.get(); normalization_V[0] = startIndex; if (match(sentence, index, "V")) { normalization_V[1] = index.get(); if (ATTF(sentence, index)) { normalization_NF[0] = index.get(); if (NF(sentence, index)) { normalization_NF[1] = index.get(); return TaggedWordsArrayToString(sentence, "C"); } } } index.set(startIndex); return null; }
Example #9
Source File: POSTaggerTest.java From JHazm with MIT License | 6 votes |
@Test public void batchTagTest() throws IOException { POSTagger tagger = new POSTagger(); String[] input = new String[] { "من", "به", "مدرسه", "رفته بودم", "."}; List<TaggedWord> expected = new ArrayList<>(); expected.add(new TaggedWord("من","PR")); expected.add(new TaggedWord("به","PREP")); expected.add(new TaggedWord("مدرسه","N")); expected.add(new TaggedWord("رفته بودم","V")); expected.add(new TaggedWord(".","PUNC")); List<TaggedWord> actual = tagger.batchTag(Arrays.asList(input)); assertEquals("Failed to tagged words of '" + StringUtils.join(input, " ") + "' sentence", expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { TaggedWord actualTaggedWord = actual.get(i); TaggedWord expectedTaggedWord = expected.get(i); if (!actualTaggedWord.tag().equals(expectedTaggedWord.tag())) assertEquals("Failed to tagged words of '" + StringUtils.join(input, " ") + "' sentence", expectedTaggedWord, actualTaggedWord); } }
Example #10
Source File: SentenceProcessor.java From ADW with GNU General Public License v3.0 | 6 votes |
public List<WordLemmaTag> processSentence(String sentence, boolean isTokenized) { final StanfordLemmatizer lemmatizer = StanfordLemmatizer.getInstance(); final StanfordPOSTagger tagger = StanfordPOSTagger.getInstance(); final List<WordLemmaTag> tlSentence = new ArrayList<WordLemmaTag>(); // the tagged sentence List<TaggedWord> tSentence = null; if (isTokenized) tSentence = tagger.tag(sentence); else { StanfordTokenizer tokenizer = StanfordTokenizer.getInstance(); List<Word> tokens = tokenizer.tokenize(sentence); tSentence = tagger.tag(tokens); } // add to the lemmatized sentence for (TaggedWord tw : tSentence) tlSentence.add(lemmatizer.lemmatize(tw)); return tlSentence; }
Example #11
Source File: Parser.java From jatecs with GNU General Public License v3.0 | 6 votes |
private Vector<String> parse(ArrayList<TaggedWord> taggedWords) { Vector<ArrayList<TaggedWord>> sentences = findAppositives(taggedWords); Vector<String> patterns = new Vector<String>(); for (Iterator<ArrayList<TaggedWord>> iterator = sentences.iterator(); iterator.hasNext(); ) { Vector<String> pattern = null; taggedWords = (ArrayList<TaggedWord>) iterator.next(); IntegerMangi index = new IntegerMangi(0); while (index.get() < taggedWords.size()) { pattern = A(taggedWords, index); if (pattern == null) pattern = B(taggedWords, index); if (pattern == null) pattern = C(taggedWords, index); if (pattern != null) patterns.addAll(pattern); else index.incr(); } } return patterns; }
Example #12
Source File: ParserAnnotatorUtils.java From Heracles with GNU General Public License v3.0 | 6 votes |
/** * Set the tags of the original tokens and the leaves if they * aren't already set. */ public static void setMissingTags(CoreMap sentence, Tree tree) { List<TaggedWord> taggedWords = null; List<Label> leaves = null; List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (int i = 0, size = tokens.size(); i < size; ++i) { CoreLabel token = tokens.get(i); if (token.tag() == null) { if (taggedWords == null) { taggedWords = tree.taggedYield(); } if (leaves == null) { leaves = tree.yield(); } token.setTag(taggedWords.get(i).tag()); Label leaf = leaves.get(i); if (leaf instanceof HasTag) { ((HasTag) leaf).setTag(taggedWords.get(i).tag()); } } } }
Example #13
Source File: DocumentFrequencyCounter.java From wiseowl with MIT License | 6 votes |
/** * Get an IDF map for the given document string. * * @param document * @return */ private static Counter<String> getIDFMapForDocument(String document) { // Clean up -- remove some Gigaword patterns that slow things down // / don't help anything document = headingSeparator.matcher(document).replaceAll(""); DocumentPreprocessor preprocessor = new DocumentPreprocessor(new StringReader(document)); preprocessor.setTokenizerFactory(tokenizerFactory); Counter<String> idfMap = new ClassicCounter<String>(); for (List<HasWord> sentence : preprocessor) { if (sentence.size() > MAX_SENTENCE_LENGTH) continue; List<TaggedWord> tagged = tagger.tagSentence(sentence); for (TaggedWord w : tagged) { if (w.tag().startsWith("n")) idfMap.incrementCount(w.word()); } } return idfMap; }
Example #14
Source File: Main.java From dependensee with GNU General Public License v2.0 | 6 votes |
private static Graph getGraph(Tree tree) throws Exception { ArrayList<TaggedWord> words = tree.taggedYield(); GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); Collection<TypedDependency> tdl = gs.typedDependencies(); Graph g = new Graph(words); for (TypedDependency td : tdl) { g.addEdge(td.gov().index() - 1, td.dep().index() - 1, td.reln().toString()); } try { g.setRoot(GrammaticalStructure.getRoots(tdl).iterator().next().gov().toString()); } catch (Exception ex) { //System.err.println("Cannot find dependency graph root. Setting root to first"); if (g.nodes.size() > 0) { g.setRoot(g.nodes.get(0).label); } } return g; }
Example #15
Source File: StanfordPOSTagger.java From ADW with GNU General Public License v3.0 | 5 votes |
public List<TaggedWord> tag(List<? extends HasWord> sentence) { if(sentence == null || sentence.size() == 0) return new ArrayList<TaggedWord>(); return tagger.tagSentence(sentence); }
Example #16
Source File: PeykareReaderTest.java From JHazm with MIT License | 5 votes |
@Test public void joinVerbPartsTest() { List<TaggedWord> input = Arrays.asList(new TaggedWord[] { new TaggedWord("اولین", "AJ"), new TaggedWord("سیاره", "Ne"), new TaggedWord("", "AJ"), new TaggedWord("از", "P"), new TaggedWord("منظومه", "Ne"), new TaggedWord("شمسی", "AJ"), new TaggedWord("دیده", "AJ"), new TaggedWord("شد", "V"), new TaggedWord(".", "PUNC") }); List<TaggedWord> expected = Arrays.asList(new TaggedWord[] { new TaggedWord("اولین", "AJ"), new TaggedWord("سیاره", "Ne"), new TaggedWord("خارج", "AJ"), new TaggedWord("از", "P"), new TaggedWord("منظومه", "Ne"), new TaggedWord("شمسی", "AJ"), new TaggedWord("دیده شد", "V"), new TaggedWord(".", "PUNC") }); List<TaggedWord> actual = PeykareReader.joinVerbParts(input); assertEquals("Failed to join verb parts of sentence", expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { TaggedWord actualTaggedWord = actual.get(i); TaggedWord expectedTaggedWord = expected.get(i); if (!actualTaggedWord.tag().equals(expectedTaggedWord.tag())) assertEquals("Failed to join verb parts of sentence", expectedTaggedWord, actualTaggedWord); } }
Example #17
Source File: POSTagger.java From JHazm with MIT License | 5 votes |
public List<TaggedWord> batchTag(List<String> sentence) { String[] sen = new String[sentence.size()]; for (int i = 0; i < sentence.size(); i++) sen[i] = sentence.get(i).replace(" ", "_"); List newSent = Sentence.toWordList(sen); List taggedSentence = this.tagger.tagSentence(newSent); List<TaggedWord> taggedSen = new ArrayList<>(); for (int i = 0; i < taggedSentence.size(); i++) { TaggedWord tw = (TaggedWord)taggedSentence.get(i); tw.setWord(sentence.get(i)); taggedSen.add(tw); } return taggedSen; }
Example #18
Source File: POSTagger.java From JHazm with MIT License | 5 votes |
public List<List<TaggedWord>> batchTags(List<List<String>> sentences) { List<List<TaggedWord>> result = new ArrayList<>(); for (List<String> sentence : sentences) { result.add(batchTag(sentence)); } return result; }
Example #19
Source File: Graph.java From dependensee with GNU General Public License v2.0 | 5 votes |
Graph(ArrayList<TaggedWord> t) { this(); int i = 1; for (TaggedWord taggedWord : t) { addNode(taggedWord.word() + "-" + (i++), taggedWord.tag()); } }
Example #20
Source File: ParseTree.java From NLIDB with Apache License 2.0 | 5 votes |
/** * Construct a parse tree using the stanford NLP parser. Only one sentence. * Here we are omitting the information of dependency labels (tags). * @param text input text. */ public ParseTree(String text, NLParser parser) { // pre-processing the input text DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text)); List<HasWord> sentence = null; for (List<HasWord> sentenceHasWord : tokenizer) { sentence = sentenceHasWord; break; } // part-of-speech tagging List<TaggedWord> tagged = parser.tagger.tagSentence(sentence); // dependency syntax parsing GrammaticalStructure gs = parser.parser.predict(tagged); // Reading the parsed sentence into ParseTree int N = sentence.size()+1; Node[] nodes = new Node[N]; root = new Node(0, "ROOT", "ROOT"); nodes[0] = root; for (int i = 0; i < N-1; i++) { nodes[i+1] = new Node(i+1, sentence.get(i).word(), tagged.get(i).tag()); } for (TypedDependency typedDep : gs.allTypedDependencies()) { int from = typedDep.gov().index(); int to = typedDep.dep().index(); // String label = typedDep.reln().getShortName(); // omitting the label nodes[to].parent = nodes[from]; nodes[from].children.add(nodes[to]); } }
Example #21
Source File: ParserDemo.java From NLIDB with Apache License 2.0 | 5 votes |
public static void main(String[] args) { String modelPath = DependencyParser.DEFAULT_MODEL; String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; for (int argIndex = 0; argIndex < args.length;) { switch (args[argIndex]) { case "-tagger": taggerPath = args[argIndex + 1]; argIndex += 2; break; case "-com.dukenlidb.nlidb.model": modelPath = args[argIndex + 1]; argIndex += 2; break; default: throw new RuntimeException("Unknown argument " + args[argIndex]); } } String text = "Return authors who have more papers than Bob in VLDB after 2000"; MaxentTagger tagger = new MaxentTagger(taggerPath); DependencyParser parser = DependencyParser.loadFromModelFile(modelPath); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text)); for (List<HasWord> sentence : tokenizer) { List<TaggedWord> tagged = tagger.tagSentence(sentence); GrammaticalStructure gs = parser.predict(tagged); // Print typed dependencies log.info(gs); } }
Example #22
Source File: StanfordPOSTagger.java From jatecs with GNU General Public License v3.0 | 5 votes |
public Vector<ArrayList<TaggedWord>> tag(String input) { Vector<ArrayList<TaggedWord>> returnVector = new Vector<ArrayList<TaggedWord>>(); List<List<HasWord>> sentences = MaxentTagger .tokenizeText(new BufferedReader(new StringReader(input))); for (List<? extends HasWord> sentence : sentences) { returnVector.add(tagger.tagSentence(sentence)); } return returnVector; }
Example #23
Source File: Chapter5.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 5 votes |
private static void usingStanfordMaxentPOS() { try { MaxentTagger tagger = new MaxentTagger(getModelDir() + "//wsj-0-18-bidirectional-distsim.tagger"); // MaxentTagger tagger = new MaxentTagger(getModelDir() + "//gate-EN-twitter.model"); // System.out.println(tagger.tagString("AFAIK she H8 cth!")); // System.out.println(tagger.tagString("BTW had a GR8 tym at the party BBIAM.")); List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader("sentences.txt"))); for (List<HasWord> sentence : sentences) { List<TaggedWord> taggedSentence = tagger.tagSentence(sentence); // Simple display System.out.println("---" + taggedSentence); // Simple conversion to String // System.out.println(Sentence.listToString(taggedSentence, false)); // Display of words and tags // for (TaggedWord taggedWord : taggedSentence) { // System.out.print(taggedWord.word() + "/" + taggedWord.tag() + " "); // } // System.out.println(); // List of specifc tags // System.out.print("NN Tagged: "); // for (TaggedWord taggedWord : taggedSentence) { // if (taggedWord.tag().startsWith("NN")) { // System.out.print(taggedWord.word() + " "); // } // } // System.out.println(); } } catch (FileNotFoundException ex) { ex.printStackTrace(); } }
Example #24
Source File: CoreNLP.java From Criteria2Query with Apache License 2.0 | 5 votes |
public ArrayList<TaggedWord> tagWords(Tree t) { ArrayList<TaggedWord> twlist = t.taggedYield(); for (int x = 0; x < twlist.size(); x++) { TaggedWord tw = twlist.get(x); System.out.println("[" + (x) + "]:" + tw.tag() + "--" + tw.word() + " (" + tw.value() + ")" + "--" + tw.beginPosition() + "--" + tw.endPosition()+"-"); //System.out.print(tw.word() + "/" + tw.tag() + " "); } return twlist; }
Example #25
Source File: Parser.java From jatecs with GNU General Public License v3.0 | 5 votes |
/************************************************************ * Parse a single review and returns the patterns extracted * * * * * @param review the review were to extract the patterns * * @return patterns the patterns extracted * ***********************************************************/ public Vector<String> extract(String review) { extractedPatterns = new Vector<String>(); Vector<ArrayList<TaggedWord>> taggedSentences = new Vector<ArrayList<TaggedWord>>(); taggedSentences = tagger.tag(review); for (ArrayList<TaggedWord> tSentence : taggedSentences) extractedPatterns.addAll(parse(tSentence)); return extractedPatterns; }
Example #26
Source File: Parser.java From jatecs with GNU General Public License v3.0 | 5 votes |
private Vector<String> A(ArrayList<TaggedWord> sentence, IntegerMangi index) { normalization_NF = new int[2]; normalization_ADJF = new Vector<int[]>(); int startIndex = index.get(); if (ATTF(sentence, index)) { normalization_NF[0] = index.get(); if (NF(sentence, index)) { normalization_NF[1] = index.get(); return TaggedWordsArrayToString(sentence, "A"); } } index.set(startIndex); return null; }
Example #27
Source File: Parser.java From jatecs with GNU General Public License v3.0 | 5 votes |
private Vector<String> B(ArrayList<TaggedWord> sentence, IntegerMangi index) { normalization_NF = new int[2]; normalization_ADJF = new Vector<int[]>(); int startIndex = index.get(); normalization_NF[0] = startIndex; if (NF(sentence, index)) { normalization_NF[1] = index.get(); if (match(sentence, index, "V")) if (ATTF(sentence, index)) return TaggedWordsArrayToString(sentence, "B"); } index.set(startIndex); return null; }
Example #28
Source File: Parser.java From jatecs with GNU General Public License v3.0 | 5 votes |
private boolean ATTF(ArrayList<TaggedWord> sentence, IntegerMangi index) { int[] temp = new int[2]; temp[0] = index.get(); if (ADJF(sentence, index)) { temp[1] = index.get(); normalization_ADJF.add(temp); return ATTF1(sentence, index); } return false; }
Example #29
Source File: Parser.java From jatecs with GNU General Public License v3.0 | 5 votes |
private boolean NF(ArrayList<TaggedWord> sentence, IntegerMangi index) { if (match(sentence, index, "DT")) return NF(sentence, index); if (match(sentence, index, "NN")) return NF1(sentence, index); if (match(sentence, index, "JJ")) return match(sentence, index, "NN"); return false; }
Example #30
Source File: Parser.java From jatecs with GNU General Public License v3.0 | 5 votes |
private boolean match(ArrayList<TaggedWord> sentence, IntegerMangi index, String tag) { if (index.get() < sentence.size() && sentence.get(index.get()).tag().startsWith(tag)) { index.incr(); return true; } return false; }