opennlp.tools.sentdetect.SentenceDetectorME#sentDetect

Source File: OpenNLPSentenceDetectionTest.java From java_in_examples with Apache License 2.0

6 votes

public static void main(String[] strings) throws Exception {
    String text = "“But I don’t want to go among mad people,” Alice remarked. " +
            "“Oh, you can’t help that,” said the Cat: “we’re all mad here. I’m mad. You’re mad.” " +
            "“How do you know I’m mad?” said Alice. " +
            "“You must be,” said the Cat, “or you wouldn’t have come here.”";

    try (InputStream modelIn = new FileInputStream(NATURAL_LANGUAGE_PROCESSING_SRC_MAIN_RESOURCES_EN_SENT_BIN)) {
        SentenceModel model = new SentenceModel(modelIn);
        SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
        String sentences[] = sentenceDetector.sentDetect(text);
        Span sentences2[] = sentenceDetector.sentPosDetect(text);
        for (String sentence : sentences) {
            System.out.println(sentence);
        }
        System.out.println(Arrays.deepToString(sentences2));
    }
}

Source File: POSStructureScorer.java From uncc2014watsonsim with GNU General Public License v2.0

6 votes

public static Parse[] parsePassageText(String p) throws InvalidFormatException{
	
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(sentenceModel);
	Parser parser = ParserFactory.create(
			parserModel,
			20, // beam size
			0.95); // advance percentage
 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);


		String sent= StringUtils.join(tks," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}

Source File: OpenNlpTests.java From uncc2014watsonsim with GNU General Public License v2.0

6 votes

public Parse[] parsePassageText(String p) throws InvalidFormatException{
	if (!modelsAreInitialized)init();
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); // advance percentage
	//find sentences, tokenize each, parse each, return top parse for each 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
		//StringTokenizer st = new StringTokenizer(tks[i]); 
		//There are several tokenizers available. SimpleTokenizer works best

		String sent= StringUtils.join(tks," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}

Source File: SentenceDetectionUnitTest.java From tutorials with MIT License

6 votes

@Test
public void givenEnglishModel_whenDetect_thenSentencesAreDetected() throws Exception {

    String paragraph = "This is a statement. This is another statement. Now is an abstract word for time, "
            + "that is always flying. And my email address is [email protected].";

    InputStream is = getClass().getResourceAsStream("/models/en-sent.bin");
    SentenceModel model = new SentenceModel(is);

    SentenceDetectorME sdetector = new SentenceDetectorME(model);

    String sentences[] = sdetector.sentDetect(paragraph);
    assertThat(sentences).contains("This is a statement.",
            "This is another statement.",
            "Now is an abstract word for time, that is always flying.",
            "And my email address is [email protected].");
}

Source File: SentenceDetectors.java From java_in_examples with Apache License 2.0

5 votes

private static String[] testOpenNLP(String text) throws Exception {
    try (InputStream modelIn = new FileInputStream(RESOURCES_EN_SENT_BIN)) {
        SentenceModel model = new SentenceModel(modelIn);
        SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
        return sentenceDetector.sentDetect(text);
    }
}

Source File: StephensonOpenNLPScorer.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public void testSentDetector(String testSents) throws InvalidFormatException{
	init();
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	String[] sentences = sentenceDetector.sentDetect(testSents);
	for (int i=0;i<sentences.length; i++)
		System.err.println("sent: "+sentences[i]);
}

Source File: NERScorer.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public void testSentDetector(String testSents) throws InvalidFormatException{
	init();
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	String[] sentences = sentenceDetector.sentDetect(testSents);
	for (int i=0;i<sentences.length; i++)
		System.err.println("sent: "+sentences[i]);
}

Source File: NERScorer.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public Parse[] parsePassageText(String p) throws InvalidFormatException{
	if (!modelsAreInitialized)init();
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	NameFinderME nameFinder = new NameFinderME(this.nerModel);
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); // advance percentage
	//find sentences, tokenize each, parse each, return top parse for each 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		//String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
		
		//StringTokenizer st = new StringTokenizer(tks[i]); 
		//There are several tokenizers available. SimpleTokenizer works best
		Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
		for (int si = 0; si < sentences.length; si++) {
	        Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]);
	        String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]);
	        Span[] names = nameFinder.find(tokens);
	        for (int ni = 0; ni < names.length; ni++) {
	            Span startSpan = tokenSpans[names[ni].getStart()];
	            int nameStart = startSpan.getStart();
	            Span endSpan = tokenSpans[names[ni].getEnd() - 1];
	            int nameEnd = endSpan.getEnd();
	            String name = sentences[si].substring(nameStart, nameEnd);
	            System.out.println(name);
	        }
	    }
		String sent= StringUtils.join(tokenizer," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}

Source File: OpenNlpTests.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

public void testSentDetector(String testSents) throws InvalidFormatException{
	init();
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	String[] sentences = sentenceDetector.sentDetect(testSents);
	for (int i=0;i<sentences.length; i++)
		System.err.println("sent: "+sentences[i]);
}

Source File: StemmingLemaEx.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

4 votes

public static void main(String args[]){
    String words[] = {"bank", "banking", "banks", "banker", "banked", 
"bankart"};
    PorterStemmer ps = new PorterStemmer();
    for(String w : words){
        String stem = ps.stem(w);
        System.out.println("Word : " + w + " Stem : " + stem);
    }
    String paragraph = "When determining the end of sentences "
        + "we need to consider several factors. Sentences may end with "
        + "exclamation marks! Or possibly questions marks? Within "
        + "sentences we may find numbers like 3.14159, abbreviations "
        + "such as found in Mr. Smith, and possibly ellipses either "
        + "within a sentence …, or at the end of a sentence…";
    String simple = "[.?!]";
    String[] splitString = (paragraph.split(simple));
    for (String string : splitString) {
        System.out.println(string);
    }
    System.out.println("-------------Using Pattern and Matcher-------------");
    Pattern sentencePattern = Pattern.compile(
        "# Match a sentence ending in punctuation or EOS.\n"
        + "[^.!?\\s]    # First char is non-punct, non-ws\n"
        + "[^.!?]*      # Greedily consume up to punctuation.\n"
        + "(?:          # Group for unrolling the loop.\n"
        + "  [.!?]      # (special) inner punctuation ok if\n"
        + "  (?!['\"]?\\s|$)  # not followed by ws or EOS.\n"
        + "  [^.!?]*    # Greedily consume up to punctuation.\n"
        + ")*           # Zero or more (special normal*)\n"
        + "[.!?]?       # Optional ending punctuation.\n"
        + "['\"]?       # Optional closing quote.\n"
        + "(?=\\s|$)",
        Pattern.MULTILINE | Pattern.COMMENTS);
    Matcher matcher = sentencePattern.matcher(paragraph);
    while (matcher.find()) {
        System.out.println(matcher.group());
    }
    System.out.println("-------------Using BreakIterator-------------");
    BreakIterator si = BreakIterator.getSentenceInstance();
    Locale cl = new Locale("en", "US");
    si.setText(paragraph);
    int boundary = si.first();
    while(boundary!=BreakIterator.DONE){
        int begin = boundary;
        System.out.println(boundary + " - ");
        boundary = si.next();
        int end = boundary;
        if(end == BreakIterator.DONE){
            break;
        }
        System.out.println(boundary + " [ " + paragraph.substring(begin,end) + " ] ");
    }
    System.out.println("-------------Using SentenceDetectorME-------------");
    try{
        InputStream is = new FileInputStream(new File("/home/ashish/Downloads/" + "en-sent.bin"));
        SentenceModel sm = new SentenceModel(is);
        SentenceDetectorME detector = new SentenceDetectorME(sm);
        String sentences [] = detector.sentDetect(paragraph);
        for(String s : sentences){
            System.out.println(s);
        }
    }
    catch(IOException e){
        System.out.println("Error Detected" + e);
        e.printStackTrace();
    }
}

Source File: KensNLPScorer.java From uncc2014watsonsim with GNU General Public License v2.0

4 votes

public String[] DivideIntoSentences(Passage p) throws InvalidFormatException{
	init();
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	return sentenceDetector.sentDetect(p.text);
}

Java Code Examples for opennlp.tools.sentdetect.SentenceDetectorME#sentDetect()