edu.stanford.nlp.process.CoreLabelTokenFactory Java Examples
The following examples show how to use
edu.stanford.nlp.process.CoreLabelTokenFactory.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TypeClassifier.java From winter with Apache License 2.0 | 6 votes |
/** * Initializes the tokenizer to detect date columns. */ public void initialize() { Properties props = new Properties(); pipeline.addAnnotator(new TokenizerAnnotator(false) { @Override public Tokenizer<CoreLabel> getTokenizer(Reader r) { // TODO Auto-generated method stub return new PTBTokenizer<CoreLabel>(r, new CoreLabelTokenFactory(), ""); } }); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); pipeline.addAnnotator(new POSTaggerAnnotator(false)); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); }
Example #2
Source File: Main.java From dependensee with GNU General Public License v2.0 | 6 votes |
public static void writeImage(String sentence, String outFile, int scale) throws Exception { LexicalizedParser lp = null; try { lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); } catch (Exception e) { System.err.println("Could not load file englishPCFG.ser.gz. Try placing this file in the same directory as Dependencee.jar"); return; } lp.setOptionFlags(new String[]{"-maxLength", "500", "-retainTmpSubcategories"}); TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize(); Tree tree = lp.apply(wordList); writeImage(tree, outFile, scale); }
Example #3
Source File: MainTest.java From dependensee with GNU General Public License v2.0 | 6 votes |
/** * Test of writeImage method, of class Main. */ @Test public void testWriteImage() throws Exception { String text = "A quick brown fox jumped over the lazy dog."; TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); LexicalizedParser lp = LexicalizedParser.loadModel(); lp.setOptionFlags(new String[]{"-maxLength", "500", "-retainTmpSubcategories"}); TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(text)).tokenize(); Tree tree = lp.apply(wordList); GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); Collection<TypedDependency> tdl = gs.typedDependenciesCollapsed(); Main.writeImage(tdl, "image.png", 3); assert (new File("image.png").exists()); }
Example #4
Source File: TokenizerDemo.java From blog-codes with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws IOException { for (String arg : args) { // option #1: By sentence. DocumentPreprocessor dp = new DocumentPreprocessor(arg); for (List<HasWord> sentence : dp) { System.out.println(sentence); } // option #2: By token PTBTokenizer<CoreLabel> ptbt = new PTBTokenizer<>(new FileReader(arg), new CoreLabelTokenFactory(), ""); while (ptbt.hasNext()) { CoreLabel label = ptbt.next(); System.out.println(label); } } }
Example #5
Source File: StanfordLexicalDemo.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 5 votes |
public static void main(String args[]){ String parseModel = getResourcePath() + "englishPCFG.ser.gz"; LexicalizedParser lexicalizedParser = LexicalizedParser.loadModel(parseModel); String [] sentenceArray = {"The", "cow" ,"jumped", "over", "the", "moon", "."}; List<CoreLabel> words = SentenceUtils.toCoreLabelList(sentenceArray); Tree parseTree = lexicalizedParser.apply(words); parseTree.pennPrint(); TreePrint treePrint = new TreePrint("typedDependenciesCollapsed"); treePrint.printTree(parseTree); String sentence = "The cow jumped over the moon."; TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); Tokenizer<CoreLabel> tokenizer = tokenizerFactory.getTokenizer(new StringReader(sentence)); List<CoreLabel> wordList = tokenizer.tokenize(); parseTree = lexicalizedParser.apply(wordList); TreebankLanguagePack tlp = lexicalizedParser.treebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parseTree); List<TypedDependency> tdl = gs.typedDependenciesCCprocessed(); System.out.println(tdl); for(TypedDependency dependency : tdl) { System.out.println("Governor Word: [" + dependency.gov() + "] Relation: [" + dependency.reln().getLongName() + "] Dependent Word: [" + dependency.dep() + "]"); } }
Example #6
Source File: Chapter1.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 5 votes |
private static void stanfordNLPExample() { PTBTokenizer ptb = new PTBTokenizer( new StringReader("He lives at 1511 W. Randolph."), new CoreLabelTokenFactory(), null); while (ptb.hasNext()) { System.out.println(ptb.next()); } }
Example #7
Source File: Main.java From dependensee with GNU General Public License v2.0 | 5 votes |
public static Graph getGraph(String sentence) throws Exception { LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); lp.setOptionFlags(new String[]{"-maxLength", "500", "-retainTmpSubcategories"}); TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize(); Tree tree = lp.apply(wordList); GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); Collection<TypedDependency> tdl = gs.typedDependencies(); return getGraph(tree, tdl); }
Example #8
Source File: Main.java From dependensee with GNU General Public License v2.0 | 5 votes |
public static Graph getGraph(String sentence, LexicalizedParser lp) throws Exception { TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize(); Tree tree = lp.apply(wordList); GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); Collection<TypedDependency> tdl = gs.typedDependencies(); return getGraph(tree, tdl); }
Example #9
Source File: Main.java From dependensee with GNU General Public License v2.0 | 5 votes |
public static void writeImage(String sentence, String outFile, LexicalizedParser lp) throws Exception { Tree parse; try { TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize(); parse = lp.apply(wordList); } catch (Exception e) { throw e; } writeImage(parse, outFile); }
Example #10
Source File: ClausIE.java From ambiverse-nlu with Apache License 2.0 | 4 votes |
/** Initializes the Stanford parser. */ public void initParser() { lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); lpq = lp.lexicalizedParserQuery(); }
Example #11
Source File: Chapter2.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 4 votes |
private static void usingTheStanfordTokenizer() { // Using PTBTokenizer System.out.println("----PTBTokenizer Example"); // First example // PTBTokenizer ptb = new PTBTokenizer(new StringReader(paragraph), // new CoreLabelTokenFactory(),null); // while (ptb.hasNext()) { // System.out.println(ptb.next()); // } // CoreLabel example CoreLabelTokenFactory ctf = new CoreLabelTokenFactory(); PTBTokenizer ptb = new PTBTokenizer(new StringReader(paragraph), ctf, "invertible=true"); // PTBTokenizer ptb = new PTBTokenizer(new StringReader(paragraph), // new WordTokenFactory(), null); while (ptb.hasNext()) { CoreLabel cl = (CoreLabel) ptb.next(); System.out.println(cl.originalText() + " (" + cl.beginPosition() + "-" + cl.endPosition() + ")"); } // Using a DocumentPreprocessor System.out.println("----DocumentPreprocessor Example"); Reader reader = new StringReader(paragraph); DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(reader); Iterator<List<HasWord>> it = documentPreprocessor.iterator(); while (it.hasNext()) { List<HasWord> sentence = it.next(); for (HasWord token : sentence) { System.out.println(token); } } // for (List<HasWord> sentence : documentPreprocessor) { //// List<HasWord> sentence = it.next(); // for (HasWord token : sentence) { // System.out.println(token); // } // } // Using a pipeline System.out.println("----pipeline Example"); Properties properties = new Properties(); properties.put("annotators", "tokenize, ssplit"); StanfordCoreNLP pipeline = new StanfordCoreNLP(properties); Annotation annotation = new Annotation(paragraph); pipeline.annotate(annotation); pipeline.prettyPrint(annotation, System.out); }
Example #12
Source File: Chapter7.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 4 votes |
private static void usingStanfordLexicalizedParser() { String parserModel = "C:/Current Books in Progress/NLP and Java/Models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"; LexicalizedParser lexicalizedParser = LexicalizedParser.loadModel(parserModel); // This option shows parsing a list of correctly tokenized words System.out.println("---First option"); String[] senetenceArray = {"The", "cow", "jumped", "over", "the", "moon", "."}; List<CoreLabel> words = Sentence.toCoreLabelList(senetenceArray); Tree parseTree = lexicalizedParser.apply(words); parseTree.pennPrint(); System.out.println(); // This option shows loading and using an explicit tokenizer System.out.println("---Second option"); String sentence = "The cow jumped over the moon."; TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); Tokenizer<CoreLabel> tokenizer = tokenizerFactory.getTokenizer(new StringReader(sentence)); List<CoreLabel> wordList = tokenizer.tokenize(); parseTree = lexicalizedParser.apply(wordList); TreebankLanguagePack tlp = lexicalizedParser.treebankLanguagePack(); // PennTreebankLanguagePack for English GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parseTree); List<TypedDependency> tdl = gs.typedDependenciesCCprocessed(); System.out.println(tdl); for (TypedDependency dependency : tdl) { System.out.println("Governor Word: [" + dependency.gov() + "] Relation: [" + dependency.reln().getLongName() + "] Dependent Word: [" + dependency.dep() + "]"); } System.out.println(); // You can also use a TreePrint object to print trees and dependencies // System.out.println("---Using TreePrint"); // TreePrint treePrint = new TreePrint("penn,typedDependenciesCollapsed"); // treePrint.printTree(parseTree); // System.out.println("TreePrint Formats"); // for (String format : TreePrint.outputTreeFormats) { // System.out.println(format); // } // System.out.println(); }
Example #13
Source File: StanfordParser.java From gAnswer with BSD 3-Clause "New" or "Revised" License | 4 votes |
public StanfordParser() { lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); tlp = new PennTreebankLanguagePack(); gsf = tlp.grammaticalStructureFactory(); }
Example #14
Source File: POSTagger.java From sarcasmbot with GNU General Public License v3.0 | 3 votes |
public POSTagger(String modelFile) { this.tagger = new MaxentTagger(modelFile); this.ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep"); }