opennlp.tools.chunker.ChunkerME Java Examples
The following examples show how to use
opennlp.tools.chunker.ChunkerME.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AnswerTypeEventStream.java From wiseowl with MIT License | 6 votes |
public static void main(String[] args) throws IOException { if (args.length == 0) { System.err.println("Usage: AnswerTypeEventStream eventfile"); System.exit(1); } int ai = 0; String eventFile = args[ai++]; String modelsDirProp = System.getProperty("models.dir", "book/src/main" + File.separator + "opennlp-models" + File.separator + "english"); File modelsDir = new File(modelsDirProp); File wordnetDir = new File(System.getProperty("wordnet.dir", "book/src/main" + File.separator + "WordNet-3.0" + File.separator + "dict")); InputStream chunkerStream = new FileInputStream( new File(modelsDir, "en-chunker.bin")); ChunkerModel chunkerModel = new ChunkerModel(chunkerStream); ChunkerME chunker = new ChunkerME(chunkerModel); InputStream posStream = new FileInputStream( new File(modelsDir, "en-pos-maxent.bin")); POSModel posModel = new POSModel(posStream); POSTaggerME tagger = new POSTaggerME(posModel); Parser parser = new ChunkParser(chunker, tagger); AnswerTypeContextGenerator actg = new AnswerTypeContextGenerator(wordnetDir); EventStream es = new AnswerTypeEventStream(eventFile, actg, parser); while (es.hasNext()) { System.out.println(es.next().toString()); } }
Example #2
Source File: OpenNLP.java From baleen with Apache License 2.0 | 6 votes |
@Override public void doInitialize(UimaContext aContext) throws ResourceInitializationException { try { tokensModel.loadModel(TokenizerModel.class, getClass().getResourceAsStream("en_token.bin")); sentencesModel.loadModel(SentenceModel.class, getClass().getResourceAsStream("en_sent.bin")); posModel.loadModel(POSModel.class, getClass().getResourceAsStream("en_pos_maxent.bin")); chunkModel.loadModel(ChunkerModel.class, getClass().getResourceAsStream("en_chunker.bin")); } catch (BaleenException be) { getMonitor().error("Unable to load OpenNLP Language Models", be); throw new ResourceInitializationException(be); } try { sentenceDetector = new SentenceDetectorME((SentenceModel) sentencesModel.getModel()); wordTokenizer = new TokenizerME((TokenizerModel) tokensModel.getModel()); posTagger = new POSTaggerME((POSModel) posModel.getModel()); phraseChunker = new ChunkerME((ChunkerModel) chunkModel.getModel()); } catch (Exception e) { getMonitor().error("Unable to create OpenNLP taggers", e); throw new ResourceInitializationException(e); } }
Example #3
Source File: BasicActions.java From knowledge-extraction with Apache License 2.0 | 6 votes |
@Test public void testChunker(){ try (InputStream modelIn = BasicActions.class.getClassLoader(). getResourceAsStream(Consts.EN_CHUNK_MODEL);){ String[] tokens = testTokenizer(); String[] tags = testTagger(); ChunkerModel chunkerModel = new ChunkerModel(modelIn); ChunkerME chunker = new ChunkerME(chunkerModel); String chunks[] = chunker.chunk(tokens, tags); System.out.println(Arrays.toString(chunks)); } catch (IOException e) { e.printStackTrace(); } }
Example #4
Source File: ChunkerUnitTest.java From tutorials with MIT License | 6 votes |
@Test public void givenChunkerModel_whenChunk_thenChunksAreDetected() throws Exception { SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; String[] tokens = tokenizer.tokenize("He reckons the current account deficit will narrow to only 8 billion."); InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin"); POSModel posModel = new POSModel(inputStreamPOSTagger); POSTaggerME posTagger = new POSTaggerME(posModel); String tags[] = posTagger.tag(tokens); InputStream inputStreamChunker = new FileInputStream("src/main/resources/models/en-chunker.bin"); ChunkerModel chunkerModel = new ChunkerModel(inputStreamChunker); ChunkerME chunker = new ChunkerME(chunkerModel); String[] chunks = chunker.chunk(tokens, tags); assertThat(chunks).contains("B-NP", "B-VP", "B-NP", "I-NP", "I-NP", "I-NP", "B-VP", "I-VP", "B-PP", "B-NP", "I-NP", "I-NP", "O"); }
Example #5
Source File: WiseOwlQParserPlugin.java From wiseowl with MIT License | 5 votes |
@SuppressWarnings("rawtypes") public void init(NamedList initArgs) { SolrParams params = SolrParams.toSolrParams(initArgs); String modelDirectory = params.get("modelDirectory", System.getProperty("model.dir"));//<co id="qqpp.model"/> String wordnetDirectory = params.get("wordnetDirectory", System.getProperty("wordnet.dir"));//<co id="qqpp.wordnet"/> if (modelDirectory != null) { File modelsDir = new File(modelDirectory); try { InputStream chunkerStream = new FileInputStream( new File(modelsDir,"en-chunker.bin")); ChunkerModel chunkerModel = new ChunkerModel(chunkerStream); chunker = new ChunkerME(chunkerModel); //<co id="qqpp.chunker"/> InputStream posStream = new FileInputStream( new File(modelsDir,"en-pos-maxent.bin")); POSModel posModel = new POSModel(posStream); tagger = new POSTaggerME(posModel); //<co id="qqpp.tagger"/> // model = new DoccatModel(new FileInputStream( //<co id="qqpp.theModel"/> // new File(modelDirectory,"en-answer.bin"))).getMaxentModel(); model = new SuffixSensitiveGISModelReader(new File(modelDirectory+"/qa/ans.bin")).getModel(); //GISModel m = new SuffixSensitiveGISModelReader(new File(modelFileName)).getModel(); probs = new double[model.getNumOutcomes()]; atcg = new AnswerTypeContextGenerator( new File(wordnetDirectory, "dict"));//<co id="qqpp.context"/> } catch (IOException e) { throw new RuntimeException(e); } } }
Example #6
Source File: AnswerTypeClassifier.java From wiseowl with MIT License | 5 votes |
/** * Train the answer model * <p> * Hint: * <pre> * mvn exec:java -Dexec.mainClass=com.tamingtext.qa.AnswerTypeClassifier \ * -Dexec.args="dist/data/questions-train.txt en-answer.bin" \ * -Dmodel.dir=../../opennlp-models \ * -Dwordnet.dir=../../Wordnet-3.0/dict * </pre> * * @param args * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length < 2) { System.err.println("Usage: AnswerTypeClassifier <trainFile> <modelFile>"); System.exit(1); } String trainFile = args[0]; File outFile = new File(args[1]); String modelsDirProp = System.getProperty("model.dir"); File modelsDir = new File(modelsDirProp); String wordnetDir = System.getProperty("wordnet.dir"); InputStream chunkerStream = new FileInputStream( new File(modelsDir, "en-chunker.bin")); ChunkerModel chunkerModel = new ChunkerModel(chunkerStream); ChunkerME chunker = new ChunkerME(chunkerModel); InputStream posStream = new FileInputStream( new File(modelsDir, "en-pos-maxent.bin")); POSModel posModel = new POSModel(posStream); POSTaggerME tagger = new POSTaggerME(posModel); Parser parser = new ChunkParser(chunker, tagger); AnswerTypeContextGenerator actg = new AnswerTypeContextGenerator(new File(wordnetDir)); //<start id="atc.train"/> AnswerTypeEventStream es = new AnswerTypeEventStream(trainFile, actg, parser); GISModel model = GIS.trainModel(100, new TwoPassDataIndexer(es, 3));//<co id="atc.train.do"/> GISModelWriter writer = new SuffixSensitiveGISModelWriter(model, outFile); writer.persist(); //new DoccatModel("en", model).serialize(new FileOutputStream(outFile)); /* <calloutlist> <callout arearefs="atc.train.do"><para>Using the event stream, which feeds us training examples, do the actual training using OpenNLP's Maxent classifier.</para></callout> </calloutlist> */ //<end id="atc.train"/> }
Example #7
Source File: FocusNoun.java From wiseowl with MIT License | 5 votes |
public static void main(String args[]) throws IOException { String wordnetDir = System.getProperty("wordnet.dir"); //wordnetDir="WordNet-3.0/dict/"; String question="Who is Abraham Lincoln?"; AnswerTypeContextGenerator atcg=new AnswerTypeContextGenerator(new File(wordnetDir)); String q=null; String modelsDirProp = System.getProperty("model.dir"); // modelsDirProp="opennlp-models/"; File modelsDir = new File(modelsDirProp); InputStream chunkerStream = new FileInputStream( new File(modelsDir,"en-chunker.bin")); ChunkerModel chunkerModel = new ChunkerModel(chunkerStream); ChunkerME chunker = new ChunkerME(chunkerModel); InputStream posStream = new FileInputStream( new File(modelsDir,"en-pos-maxent.bin")); POSModel posModel = new POSModel(posStream); POSTaggerME tagger = new POSTaggerME(posModel); Parser parser = new ChunkParser(chunker, tagger); Parse query = ParserTool.parseLine(question,parser,1)[0]; String[] context=atcg.getContext(query); for(int i=0;i<context.length;i++) { if(context[i].startsWith("hw=") || context[i].startsWith("mw=")) { System.out.println(context[i].substring(3)); } } }
Example #8
Source File: FocusNoun.java From wiseowl with MIT License | 5 votes |
public String[] getFocusNoun(String question) throws IOException { String wordnetDir = System.getProperty("wordnet.dir"); wordnetDir="WordNet-3.0/dict/"; AnswerTypeContextGenerator atcg=new AnswerTypeContextGenerator(new File(wordnetDir)); String q=null; String modelsDirProp = System.getProperty("model.dir"); modelsDirProp="opennlp-models/"; File modelsDir = new File(modelsDirProp); InputStream chunkerStream = new FileInputStream( new File(modelsDir,"en-chunker.bin")); ChunkerModel chunkerModel = new ChunkerModel(chunkerStream); ChunkerME chunker = new ChunkerME(chunkerModel); InputStream posStream = new FileInputStream( new File(modelsDir,"en-pos-maxent.bin")); POSModel posModel = new POSModel(posStream); POSTaggerME tagger = new POSTaggerME(posModel); Parser parser = new ChunkParser(chunker, tagger); Parse query = ParserTool.parseLine(question,parser,1)[0]; String[] context=atcg.getContext(query); String[] focus=new String[2]; int p=0; for(int i=0;i<context.length;i++) { if(context[i].startsWith("hw=") || context[i].startsWith("mw=")) { //System.out.println(context[i].substring(3)); focus[p++]=context[i].substring(3); } } return focus; }
Example #9
Source File: OpenNlpChunkerConceptProvider.java From bioasq with Apache License 2.0 | 5 votes |
@Override public boolean initialize(ResourceSpecifier aSpecifier, Map<String, Object> aAdditionalParams) throws ResourceInitializationException { boolean ret = super.initialize(aSpecifier, aAdditionalParams); String model = String.class.cast(getParameterValue("chunker-model")); try (InputStream ois = getClass().getResourceAsStream(model)) { chunker = new ChunkerME(new ChunkerModel(ois)); Streams.closeQuietly(ois); } catch (Exception e) { throw new ResourceInitializationException(e); } type = Arrays.asList(String.class.cast(getParameterValue("type")).split(",")); minLength = Integer.class.cast(getParameterValue("min-length")); return ret; }
Example #10
Source File: OpenNlpModule.java From SciGraph with Apache License 2.0 | 5 votes |
@CheckedProvides(ChunkerProvider.class) ChunkerME getChunker() throws IOException { try (InputStream is = getClass().getResourceAsStream("/opennlp/en-chunker.bin")) { ChunkerModel model = new ChunkerModel(is); return new ChunkerME(model); } }
Example #11
Source File: Chapter5.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 4 votes |
private static void usingOpenNLPChunker() { try ( InputStream posModelStream = new FileInputStream( getModelDir() + "\\en-pos-maxent.bin"); InputStream chunkerStream = new FileInputStream( getModelDir() + "\\en-chunker.bin");) { POSModel model = new POSModel(posModelStream); POSTaggerME tagger = new POSTaggerME(model); // Used to create sample data for trainer // for (String sentence : sentences) { // String sen[] = tokenizeSentence(sentence); // String tags[] = tagger.tag(sen); // for (int i = 0; i < tags.length; i++) { //// for (String token : sentence) { // System.out.print(sen[i] + "/" + tags[i] + " "); // } // System.out.println(); // } // System.out.println(); String tags[] = tagger.tag(sentence); for (int i = 0; i < tags.length; i++) { // for (String token : sentence) { System.out.print(sentence[i] + "/" + tags[i] + " "); } System.out.println(); // chunker System.out.println("------------Chunker -----------"); ChunkerModel chunkerModel = new ChunkerModel(chunkerStream); ChunkerME chunkerME = new ChunkerME(chunkerModel); String result[] = chunkerME.chunk(sentence, tags); for (int i = 0; i < result.length; i++) { System.out.println("[" + sentence[i] + "] " + result[i]); } System.out.println("------------Chunker Spans -----------"); Span[] spans = chunkerME.chunkAsSpans(sentence, tags); for (Span span : spans) { System.out.print("Type: " + span.getType() + " - " + " Begin: " + span.getStart() + " End:" + span.getEnd() + " Length: " + span.length() + " ["); for (int j = span.getStart(); j < span.getEnd(); j++) { System.out.print(sentence[j] + " "); } System.out.println("]"); } } catch (IOException ex) { ex.printStackTrace(); } }
Example #12
Source File: ChunkParser.java From wiseowl with MIT License | 4 votes |
public ChunkParser(ChunkerME chunker, POSTaggerME tagger) { this.chunker = chunker; this.tagger = tagger; }
Example #13
Source File: NLPChunkerOp.java From lucene-solr with Apache License 2.0 | 4 votes |
public NLPChunkerOp(ChunkerModel chunkerModel) throws IOException { chunker = new ChunkerME(chunkerModel); }
Example #14
Source File: OpenNlpModule.java From SciGraph with Apache License 2.0 | 4 votes |
@Override ChunkerME get() throws IOException;
Example #15
Source File: ChunkerOpenNLP.java From jate with GNU Lesser General Public License v3.0 | 4 votes |
public ChunkerOpenNLP(InputStream model) throws IOException { chunker=new ChunkerME(new ChunkerModel(model)); }