Java Code Examples for opennlp.tools.sentdetect.SentenceDetectorME#sentDetect()
The following examples show how to use
opennlp.tools.sentdetect.SentenceDetectorME#sentDetect() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OpenNLPSentenceDetectionTest.java From java_in_examples with Apache License 2.0 | 6 votes |
public static void main(String[] strings) throws Exception { String text = "“But I don’t want to go among mad people,” Alice remarked. " + "“Oh, you can’t help that,” said the Cat: “we’re all mad here. I’m mad. You’re mad.” " + "“How do you know I’m mad?” said Alice. " + "“You must be,” said the Cat, “or you wouldn’t have come here.”"; try (InputStream modelIn = new FileInputStream(NATURAL_LANGUAGE_PROCESSING_SRC_MAIN_RESOURCES_EN_SENT_BIN)) { SentenceModel model = new SentenceModel(modelIn); SentenceDetectorME sentenceDetector = new SentenceDetectorME(model); String sentences[] = sentenceDetector.sentDetect(text); Span sentences2[] = sentenceDetector.sentPosDetect(text); for (String sentence : sentences) { System.out.println(sentence); } System.out.println(Arrays.deepToString(sentences2)); } }
Example 2
Source File: POSStructureScorer.java From uncc2014watsonsim with GNU General Public License v2.0 | 6 votes |
public static Parse[] parsePassageText(String p) throws InvalidFormatException{ //initialize SentenceDetectorME sentenceDetector = new SentenceDetectorME(sentenceModel); Parser parser = ParserFactory.create( parserModel, 20, // beam size 0.95); // advance percentage String[] sentences = sentenceDetector.sentDetect(p); Parse[] results = new Parse[sentences.length]; for (int i=0;i<sentences.length;i++){ String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]); String sent= StringUtils.join(tks," "); System.out.println("Found sentence " + sent); Parse[] sentResults = ParserTool.parseLine(sent,parser, 1); results[i]=sentResults[0]; } return results; }
Example 3
Source File: OpenNlpTests.java From uncc2014watsonsim with GNU General Public License v2.0 | 6 votes |
public Parse[] parsePassageText(String p) throws InvalidFormatException{ if (!modelsAreInitialized)init(); //initialize SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel); Parser parser = ParserFactory.create( this.parserModel, 20, // beam size 0.95); // advance percentage //find sentences, tokenize each, parse each, return top parse for each String[] sentences = sentenceDetector.sentDetect(p); Parse[] results = new Parse[sentences.length]; for (int i=0;i<sentences.length;i++){ String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]); //StringTokenizer st = new StringTokenizer(tks[i]); //There are several tokenizers available. SimpleTokenizer works best String sent= StringUtils.join(tks," "); System.out.println("Found sentence " + sent); Parse[] sentResults = ParserTool.parseLine(sent,parser, 1); results[i]=sentResults[0]; } return results; }
Example 4
Source File: SentenceDetectionUnitTest.java From tutorials with MIT License | 6 votes |
@Test public void givenEnglishModel_whenDetect_thenSentencesAreDetected() throws Exception { String paragraph = "This is a statement. This is another statement. Now is an abstract word for time, " + "that is always flying. And my email address is google@gmail.com."; InputStream is = getClass().getResourceAsStream("/models/en-sent.bin"); SentenceModel model = new SentenceModel(is); SentenceDetectorME sdetector = new SentenceDetectorME(model); String sentences[] = sdetector.sentDetect(paragraph); assertThat(sentences).contains("This is a statement.", "This is another statement.", "Now is an abstract word for time, that is always flying.", "And my email address is google@gmail.com."); }
Example 5
Source File: SentenceDetectors.java From java_in_examples with Apache License 2.0 | 5 votes |
private static String[] testOpenNLP(String text) throws Exception { try (InputStream modelIn = new FileInputStream(RESOURCES_EN_SENT_BIN)) { SentenceModel model = new SentenceModel(modelIn); SentenceDetectorME sentenceDetector = new SentenceDetectorME(model); return sentenceDetector.sentDetect(text); } }
Example 6
Source File: StephensonOpenNLPScorer.java From uncc2014watsonsim with GNU General Public License v2.0 | 5 votes |
public void testSentDetector(String testSents) throws InvalidFormatException{ init(); SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel); String[] sentences = sentenceDetector.sentDetect(testSents); for (int i=0;i<sentences.length; i++) System.err.println("sent: "+sentences[i]); }
Example 7
Source File: NERScorer.java From uncc2014watsonsim with GNU General Public License v2.0 | 5 votes |
public void testSentDetector(String testSents) throws InvalidFormatException{ init(); SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel); String[] sentences = sentenceDetector.sentDetect(testSents); for (int i=0;i<sentences.length; i++) System.err.println("sent: "+sentences[i]); }
Example 8
Source File: NERScorer.java From uncc2014watsonsim with GNU General Public License v2.0 | 5 votes |
public Parse[] parsePassageText(String p) throws InvalidFormatException{ if (!modelsAreInitialized)init(); //initialize SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel); NameFinderME nameFinder = new NameFinderME(this.nerModel); Parser parser = ParserFactory.create( this.parserModel, 20, // beam size 0.95); // advance percentage //find sentences, tokenize each, parse each, return top parse for each String[] sentences = sentenceDetector.sentDetect(p); Parse[] results = new Parse[sentences.length]; for (int i=0;i<sentences.length;i++){ //String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]); //StringTokenizer st = new StringTokenizer(tks[i]); //There are several tokenizers available. SimpleTokenizer works best Tokenizer tokenizer = SimpleTokenizer.INSTANCE; for (int si = 0; si < sentences.length; si++) { Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]); String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]); Span[] names = nameFinder.find(tokens); for (int ni = 0; ni < names.length; ni++) { Span startSpan = tokenSpans[names[ni].getStart()]; int nameStart = startSpan.getStart(); Span endSpan = tokenSpans[names[ni].getEnd() - 1]; int nameEnd = endSpan.getEnd(); String name = sentences[si].substring(nameStart, nameEnd); System.out.println(name); } } String sent= StringUtils.join(tokenizer," "); System.out.println("Found sentence " + sent); Parse[] sentResults = ParserTool.parseLine(sent,parser, 1); results[i]=sentResults[0]; } return results; }
Example 9
Source File: OpenNlpTests.java From uncc2014watsonsim with GNU General Public License v2.0 | 5 votes |
public void testSentDetector(String testSents) throws InvalidFormatException{ init(); SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel); String[] sentences = sentenceDetector.sentDetect(testSents); for (int i=0;i<sentences.length; i++) System.err.println("sent: "+sentences[i]); }
Example 10
Source File: StemmingLemaEx.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 4 votes |
public static void main(String args[]){ String words[] = {"bank", "banking", "banks", "banker", "banked", "bankart"}; PorterStemmer ps = new PorterStemmer(); for(String w : words){ String stem = ps.stem(w); System.out.println("Word : " + w + " Stem : " + stem); } String paragraph = "When determining the end of sentences " + "we need to consider several factors. Sentences may end with " + "exclamation marks! Or possibly questions marks? Within " + "sentences we may find numbers like 3.14159, abbreviations " + "such as found in Mr. Smith, and possibly ellipses either " + "within a sentence …, or at the end of a sentence…"; String simple = "[.?!]"; String[] splitString = (paragraph.split(simple)); for (String string : splitString) { System.out.println(string); } System.out.println("-------------Using Pattern and Matcher-------------"); Pattern sentencePattern = Pattern.compile( "# Match a sentence ending in punctuation or EOS.\n" + "[^.!?\\s] # First char is non-punct, non-ws\n" + "[^.!?]* # Greedily consume up to punctuation.\n" + "(?: # Group for unrolling the loop.\n" + " [.!?] # (special) inner punctuation ok if\n" + " (?!['\"]?\\s|$) # not followed by ws or EOS.\n" + " [^.!?]* # Greedily consume up to punctuation.\n" + ")* # Zero or more (special normal*)\n" + "[.!?]? # Optional ending punctuation.\n" + "['\"]? # Optional closing quote.\n" + "(?=\\s|$)", Pattern.MULTILINE | Pattern.COMMENTS); Matcher matcher = sentencePattern.matcher(paragraph); while (matcher.find()) { System.out.println(matcher.group()); } System.out.println("-------------Using BreakIterator-------------"); BreakIterator si = BreakIterator.getSentenceInstance(); Locale cl = new Locale("en", "US"); si.setText(paragraph); int boundary = si.first(); while(boundary!=BreakIterator.DONE){ int begin = boundary; System.out.println(boundary + " - "); boundary = si.next(); int end = boundary; if(end == BreakIterator.DONE){ break; } System.out.println(boundary + " [ " + paragraph.substring(begin,end) + " ] "); } System.out.println("-------------Using SentenceDetectorME-------------"); try{ InputStream is = new FileInputStream(new File("/home/ashish/Downloads/" + "en-sent.bin")); SentenceModel sm = new SentenceModel(is); SentenceDetectorME detector = new SentenceDetectorME(sm); String sentences [] = detector.sentDetect(paragraph); for(String s : sentences){ System.out.println(s); } } catch(IOException e){ System.out.println("Error Detected" + e); e.printStackTrace(); } }
Example 11
Source File: KensNLPScorer.java From uncc2014watsonsim with GNU General Public License v2.0 | 4 votes |
public String[] DivideIntoSentences(Passage p) throws InvalidFormatException{ init(); SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel); return sentenceDetector.sentDetect(p.text); }