opennlp.tools.sentdetect.SentenceModel Java Examples
The following examples show how to use
opennlp.tools.sentdetect.SentenceModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SentenceDetectionUnitTest.java From tutorials with MIT License | 6 votes |
@Test public void givenEnglishModel_whenDetect_thenSentencesAreDetected() throws Exception { String paragraph = "This is a statement. This is another statement. Now is an abstract word for time, " + "that is always flying. And my email address is [email protected]."; InputStream is = getClass().getResourceAsStream("/models/en-sent.bin"); SentenceModel model = new SentenceModel(is); SentenceDetectorME sdetector = new SentenceDetectorME(model); String sentences[] = sdetector.sentDetect(paragraph); assertThat(sentences).contains("This is a statement.", "This is another statement.", "Now is an abstract word for time, that is always flying.", "And my email address is [email protected]."); }
Example #2
Source File: SentenceSegmenter.java From dexter with Apache License 2.0 | 6 votes |
public SentenceSegmenter() { InputStream modelIn = null; try { // Loading sentence detection model modelIn = getClass().getResourceAsStream("/nlp/en-sent.bin"); final SentenceModel sentenceModel = new SentenceModel(modelIn); modelIn.close(); sentenceDetector = new SentenceDetectorME(sentenceModel); } catch (final IOException ioe) { ioe.printStackTrace(); } finally { if (modelIn != null) { try { modelIn.close(); } catch (final IOException e) { } // oh well! } } }
Example #3
Source File: OpenNlpTartarus.java From scava with Eclipse Public License 2.0 | 6 votes |
public OpenNlpTartarus() { logger = (OssmeterLogger) OssmeterLogger.getLogger("uk.ac.nactem.posstemmer"); ClassLoader cl = getClass().getClassLoader(); try { posTaggerME = loadPoSME(cl, "models/en-pos-maxent.bin"); simpleTokenizer = SimpleTokenizer.INSTANCE; SentenceModel sentenceModel = loadSentenceModel(cl, "models/en-sent.bin"); sentenceDetector = new SentenceDetectorME(sentenceModel); logger.info("Models have been sucessfully loaded"); } catch (IOException e) { logger.error("Error while loading the model:", e); e.printStackTrace(); } // InputStream tokenizerModelInput = loadModelInput("models/en-token.bin"); // TokenizerModel tokenizerModel = loadTokenizerModel(tokenizerModelInput); // tokenizerME = new TokenizerME(tokenizerModel); stemmer = new englishStemmer(); }
Example #4
Source File: SentenceDetect.java From datafu with Apache License 2.0 | 6 votes |
public DataBag exec(Tuple input) throws IOException { if(input.size() != 1) { throw new IOException(); } String inputString = input.get(0).toString(); if(inputString == null || inputString == "") { return null; } DataBag outBag = bf.newDefaultBag(); if(sdetector == null) { String loadFile = CachedFile.getFileName(MODEL_FILE, this.modelPath); InputStream is = new FileInputStream(loadFile); InputStream buffer = new BufferedInputStream(is); SentenceModel model = new SentenceModel(buffer); this.sdetector = new SentenceDetectorME(model); } String sentences[] = this.sdetector.sentDetect(inputString); for(String sentence : sentences) { Tuple outTuple = tf.newTuple(sentence); outBag.add(outTuple); } return outBag; }
Example #5
Source File: OpenNLPTokenizerFactory.java From jate with GNU Lesser General Public License v3.0 | 6 votes |
@Override public void inform(ResourceLoader loader) throws IOException { if(sentenceModelFile!=null) { sentenceOp = new SentenceDetectorME(new SentenceModel( loader.openResource(sentenceModelFile))); } if(tokenizerModelFile==null) throw new IOException("Parameter 'tokenizerModle' is required, but is invalid:"+tokenizerModelFile); tokenizerOp = new TokenizerME(new TokenizerModel( loader.openResource(tokenizerModelFile) )); if(parChunkingClass!=null) { try { Class c = Class.forName(parChunkingClass); Object o = c.newInstance(); paragraphChunker = (ParagraphChunker) o; }catch (Exception e){ throw new IOException(e); } } }
Example #6
Source File: OpenNLP.java From baleen with Apache License 2.0 | 6 votes |
@Override public void doInitialize(UimaContext aContext) throws ResourceInitializationException { try { tokensModel.loadModel(TokenizerModel.class, getClass().getResourceAsStream("en_token.bin")); sentencesModel.loadModel(SentenceModel.class, getClass().getResourceAsStream("en_sent.bin")); posModel.loadModel(POSModel.class, getClass().getResourceAsStream("en_pos_maxent.bin")); chunkModel.loadModel(ChunkerModel.class, getClass().getResourceAsStream("en_chunker.bin")); } catch (BaleenException be) { getMonitor().error("Unable to load OpenNLP Language Models", be); throw new ResourceInitializationException(be); } try { sentenceDetector = new SentenceDetectorME((SentenceModel) sentencesModel.getModel()); wordTokenizer = new TokenizerME((TokenizerModel) tokensModel.getModel()); posTagger = new POSTaggerME((POSModel) posModel.getModel()); phraseChunker = new ChunkerME((ChunkerModel) chunkModel.getModel()); } catch (Exception e) { getMonitor().error("Unable to create OpenNLP taggers", e); throw new ResourceInitializationException(e); } }
Example #7
Source File: SharedOpenNLPModelTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testLoad() throws Exception { SharedOpenNLPModel m = new SharedOpenNLPModel(); m.loadModel(TokenizerModel.class, OpenNLP.class.getResourceAsStream("en_token.bin")); BaseModel bm = m.getModel(); assertNotNull(bm); assertTrue(bm instanceof TokenizerModel); assertEquals("en", bm.getLanguage()); // Trying to load a different model shouldn't change the resource m.loadModel(SentenceModel.class, OpenNLP.class.getResourceAsStream("en_sent.bin")); assertEquals(bm, m.getModel()); m.doDestroy(); }
Example #8
Source File: OpenNLPSentenceDetectionTest.java From java_in_examples with Apache License 2.0 | 6 votes |
public static void main(String[] strings) throws Exception { String text = "“But I don’t want to go among mad people,” Alice remarked. " + "“Oh, you can’t help that,” said the Cat: “we’re all mad here. I’m mad. You’re mad.” " + "“How do you know I’m mad?” said Alice. " + "“You must be,” said the Cat, “or you wouldn’t have come here.”"; try (InputStream modelIn = new FileInputStream(NATURAL_LANGUAGE_PROCESSING_SRC_MAIN_RESOURCES_EN_SENT_BIN)) { SentenceModel model = new SentenceModel(modelIn); SentenceDetectorME sentenceDetector = new SentenceDetectorME(model); String sentences[] = sentenceDetector.sentDetect(text); Span sentences2[] = sentenceDetector.sentPosDetect(text); for (String sentence : sentences) { System.out.println(sentence); } System.out.println(Arrays.deepToString(sentences2)); } }
Example #9
Source File: SentenceDetectors.java From java_in_examples with Apache License 2.0 | 5 votes |
private static String[] testOpenNLP(String text) throws Exception { try (InputStream modelIn = new FileInputStream(RESOURCES_EN_SENT_BIN)) { SentenceModel model = new SentenceModel(modelIn); SentenceDetectorME sentenceDetector = new SentenceDetectorME(model); return sentenceDetector.sentDetect(text); } }
Example #10
Source File: SentenceSplitterOpenNLP.java From jate with GNU Lesser General Public License v3.0 | 5 votes |
public SentenceSplitterOpenNLP(String modelFile) throws IOException { LOG.info("Initializing OpenNLP sentence splitter..."); FileInputStream modelFileStream = new FileInputStream(modelFile); try { sentenceDetector = new SentenceDetectorME(new SentenceModel(modelFileStream)); } finally { modelFileStream.close(); } }
Example #11
Source File: OpenNlpModule.java From SciGraph with Apache License 2.0 | 5 votes |
@CheckedProvides(SentenceDetectorProvider.class) SentenceDetectorME getSentenceDetector() throws IOException { try (InputStream is = getClass().getResourceAsStream("/opennlp/en-sent.bin")) { SentenceModel model = new SentenceModel(is); return new SentenceDetectorME(model); } }
Example #12
Source File: SentenceDetector.java From knowledge-extraction with Apache License 2.0 | 5 votes |
public SentenceDetector(String modelPath) { try (InputStream modelIn = SentenceDetector.class.getClassLoader() .getResourceAsStream(modelPath);){ SentenceModel model = new SentenceModel(modelIn); sentenceDetector = new SentenceDetectorME(model); } catch (IOException e) { e.printStackTrace(); } }
Example #13
Source File: SentenceDetectors.java From java_in_examples with Apache License 2.0 | 5 votes |
private static Span[] testOpenNLPPosition(String text) throws Exception { try (InputStream modelIn = new FileInputStream(RESOURCES_EN_SENT_BIN)) { SentenceModel model = new SentenceModel(modelIn); SentenceDetectorME sentenceDetector = new SentenceDetectorME(model); return sentenceDetector.sentPosDetect(text); } }
Example #14
Source File: ONLPSentenceModel.java From biomedicus with Apache License 2.0 | 5 votes |
@Override protected ONLPSentenceModel loadModel() throws BiomedicusException { try (InputStream inputStream = Files.newInputStream(path)) { SentenceModel sentenceModel = new SentenceModel(inputStream); return new ONLPSentenceModel(sentenceModel); } catch (IOException e) { throw new BiomedicusException(e); } }
Example #15
Source File: OpenNLPOpsFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public static NLPSentenceDetectorOp getSentenceDetector(String modelName) throws IOException { if (modelName != null) { SentenceModel model = sentenceModels.get(modelName); return new NLPSentenceDetectorOp(model); } else { return new NLPSentenceDetectorOp(); } }
Example #16
Source File: SentenceDetectorFactory.java From wiseowl with MIT License | 5 votes |
/** Load the sentence detector * * @param language * @param modelDirectory * @throws IOException */ protected void loadSentenceDetector(String language, String modelDirectory) throws IOException { String modelFile = modelDirectory + File.separatorChar + language + "-sent.bin"; log.info("Loading sentence model {}", modelFile); InputStream modelStream = new FileInputStream(modelFile); SentenceModel model = new SentenceModel(modelStream); detector = new SentenceDetectorME(model); }
Example #17
Source File: NLPModelsStore.java From db with GNU Affero General Public License v3.0 | 5 votes |
private void initSetenceDetector() { SentenceModel sentenceModel; /* Load english sentence detector */ sentenceModel = getSentenceModel(NLPLanguages.ENGLISH); if(sentenceModel != null) { sentenceDetectorMap.put(NLPLanguages.ENGLISH.getLanguageCode(), new SentenceDetectorME(sentenceModel)); logger.debug("OpenNLP english sentence detector loaded successfully"); } //TODO: Load models for every other language to be supported }
Example #18
Source File: StemmingLemaEx.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 4 votes |
public static void main(String args[]){ String words[] = {"bank", "banking", "banks", "banker", "banked", "bankart"}; PorterStemmer ps = new PorterStemmer(); for(String w : words){ String stem = ps.stem(w); System.out.println("Word : " + w + " Stem : " + stem); } String paragraph = "When determining the end of sentences " + "we need to consider several factors. Sentences may end with " + "exclamation marks! Or possibly questions marks? Within " + "sentences we may find numbers like 3.14159, abbreviations " + "such as found in Mr. Smith, and possibly ellipses either " + "within a sentence …, or at the end of a sentence…"; String simple = "[.?!]"; String[] splitString = (paragraph.split(simple)); for (String string : splitString) { System.out.println(string); } System.out.println("-------------Using Pattern and Matcher-------------"); Pattern sentencePattern = Pattern.compile( "# Match a sentence ending in punctuation or EOS.\n" + "[^.!?\\s] # First char is non-punct, non-ws\n" + "[^.!?]* # Greedily consume up to punctuation.\n" + "(?: # Group for unrolling the loop.\n" + " [.!?] # (special) inner punctuation ok if\n" + " (?!['\"]?\\s|$) # not followed by ws or EOS.\n" + " [^.!?]* # Greedily consume up to punctuation.\n" + ")* # Zero or more (special normal*)\n" + "[.!?]? # Optional ending punctuation.\n" + "['\"]? # Optional closing quote.\n" + "(?=\\s|$)", Pattern.MULTILINE | Pattern.COMMENTS); Matcher matcher = sentencePattern.matcher(paragraph); while (matcher.find()) { System.out.println(matcher.group()); } System.out.println("-------------Using BreakIterator-------------"); BreakIterator si = BreakIterator.getSentenceInstance(); Locale cl = new Locale("en", "US"); si.setText(paragraph); int boundary = si.first(); while(boundary!=BreakIterator.DONE){ int begin = boundary; System.out.println(boundary + " - "); boundary = si.next(); int end = boundary; if(end == BreakIterator.DONE){ break; } System.out.println(boundary + " [ " + paragraph.substring(begin,end) + " ] "); } System.out.println("-------------Using SentenceDetectorME-------------"); try{ InputStream is = new FileInputStream(new File("/home/ashish/Downloads/" + "en-sent.bin")); SentenceModel sm = new SentenceModel(is); SentenceDetectorME detector = new SentenceDetectorME(sm); String sentences [] = detector.sentDetect(paragraph); for(String s : sentences){ System.out.println(s); } } catch(IOException e){ System.out.println("Error Detected" + e); e.printStackTrace(); } }
Example #19
Source File: SentenceSplitterOpenNLP.java From jate with GNU Lesser General Public License v3.0 | 4 votes |
public SentenceSplitterOpenNLP(InputStream model) throws IOException { LOG.info("Initializing OpenNLP sentence splitter..."); sentenceDetector = new SentenceDetectorME(new SentenceModel(model)); }
Example #20
Source File: SentenceSplitterOpenNLP.java From jate with GNU Lesser General Public License v3.0 | 4 votes |
public SentenceSplitterOpenNLP(File modelFile) throws IOException { LOG.info("Initializing OpenNLP sentence splitter..."); sentenceDetector = new SentenceDetectorME(new SentenceModel(modelFile)); }
Example #21
Source File: NLPSentenceDetectorOp.java From lucene-solr with Apache License 2.0 | 4 votes |
public NLPSentenceDetectorOp(SentenceModel model) throws IOException { sentenceSplitter = new SentenceDetectorME(model); }
Example #22
Source File: ONLPSentenceModel.java From biomedicus with Apache License 2.0 | 2 votes |
/** * Default constructor. Initializes with the OpenNLP {@link SentenceModel} used to create * the OpenNLP sentence detector. * * @param model sentence model. */ private ONLPSentenceModel(SentenceModel model) { this.model = model; }