java.text.BreakIterator#next

Source File: TextComponent.java From openjdk-jdk8u with GNU General Public License v2.0

6 votes

/**
 * Needed to unify forward and backward searching.
 * The method assumes that s is the text assigned to words.
 */
private int findWordLimit(int index, BreakIterator words, boolean direction,
                                 String s) {
    // Fix for 4256660 and 4256661.
    // Words iterator is different from character and sentence iterators
    // in that end of one word is not necessarily start of another word.
    // Please see java.text.BreakIterator JavaDoc. The code below is
    // based on nextWordStartAfter example from BreakIterator.java.
    int last = (direction == NEXT) ? words.following(index)
                                   : words.preceding(index);
    int current = (direction == NEXT) ? words.next()
                                      : words.previous();
    while (current != BreakIterator.DONE) {
        for (int p = Math.min(last, current); p < Math.max(last, current); p++) {
            if (Character.isLetter(s.charAt(p))) {
                return last;
            }
        }
        last = current;
        current = (direction == NEXT) ? words.next()
                                      : words.previous();
    }
    return BreakIterator.DONE;
}

Source File: TextComponent.java From JDKSourceCode1.8 with MIT License

6 votes

/**
 * Needed to unify forward and backward searching.
 * The method assumes that s is the text assigned to words.
 */
private int findWordLimit(int index, BreakIterator words, boolean direction,
                                 String s) {
    // Fix for 4256660 and 4256661.
    // Words iterator is different from character and sentence iterators
    // in that end of one word is not necessarily start of another word.
    // Please see java.text.BreakIterator JavaDoc. The code below is
    // based on nextWordStartAfter example from BreakIterator.java.
    int last = (direction == NEXT) ? words.following(index)
                                   : words.preceding(index);
    int current = (direction == NEXT) ? words.next()
                                      : words.previous();
    while (current != BreakIterator.DONE) {
        for (int p = Math.min(last, current); p < Math.max(last, current); p++) {
            if (Character.isLetter(s.charAt(p))) {
                return last;
            }
        }
        last = current;
        current = (direction == NEXT) ? words.next()
                                      : words.previous();
    }
    return BreakIterator.DONE;
}

Source File: TextComponent.java From openjdk-jdk9 with GNU General Public License v2.0

6 votes

/**
 * Needed to unify forward and backward searching.
 * The method assumes that s is the text assigned to words.
 */
private int findWordLimit(int index, BreakIterator words, boolean direction,
                                 String s) {
    // Fix for 4256660 and 4256661.
    // Words iterator is different from character and sentence iterators
    // in that end of one word is not necessarily start of another word.
    // Please see java.text.BreakIterator JavaDoc. The code below is
    // based on nextWordStartAfter example from BreakIterator.java.
    int last = (direction == NEXT) ? words.following(index)
                                   : words.preceding(index);
    int current = (direction == NEXT) ? words.next()
                                      : words.previous();
    while (current != BreakIterator.DONE) {
        for (int p = Math.min(last, current); p < Math.max(last, current); p++) {
            if (Character.isLetter(s.charAt(p))) {
                return last;
            }
        }
        last = current;
        current = (direction == NEXT) ? words.next()
                                      : words.previous();
    }
    return BreakIterator.DONE;
}

Source File: SimpleTokenAndSentenceAnnotator.java From uima-uimaj with Apache License 2.0

6 votes

void makeAnnotations(Maker m, BreakIterator b) {
  b.setText(input);
  for (int end = b.next(), start = b.first(); end != BreakIterator.DONE; start = end, end = b
          .next()) {
    // eliminate all-whitespace tokens
    boolean isWhitespace = true;
    for (int i = start; i < end; i++) {
      if (!Character.isWhitespace(input.charAt(i))) {
        isWhitespace = false;
        break;
      }
    }
    if (!isWhitespace) {
      m.newAnnotation(jcas, start, end).addToIndexes();
    }
  }
}

Source File: BreakIteratorTest.java From TencentKona-8 with GNU General Public License v2.0

5 votes

/**
 * Bug 4068137
 */
public void TestEndBehavior()
{
    String testString = "boo.";
    BreakIterator wb = BreakIterator.getWordInstance();
    wb.setText(testString);

    if (wb.first() != 0)
        errln("Didn't get break at beginning of string.");
    if (wb.next() != 3)
        errln("Didn't get break before period in \"boo.\"");
    if (wb.current() != 4 && wb.next() != 4)
        errln("Didn't get break at end of string.");
}

Source File: TestSplittingBreakIterator.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Returns a string comprised of spaces and '^' only at the boundaries.
 */
private String readBoundariesToString(BreakIterator bi, String text) {
  // init markers to spaces
  StringBuilder markers = new StringBuilder();
  markers.setLength(text.length() + 1);
  for (int k = 0; k < markers.length(); k++) {
    markers.setCharAt(k, ' ');
  }

  bi.setText(text);
  for (int boundary = bi.current(); boundary != BreakIterator.DONE; boundary = bi.next()) {
    markers.setCharAt(boundary, '^');
  }
  return markers.toString();
}

Source File: SexpBaseForwardHandler.java From e4macs with Eclipse Public License 1.0

5 votes

/**
 * @see com.mulgasoft.emacsplus.commands.SexpHandler#getNextPosition(org.eclipse.jface.text.IDocument, java.text.BreakIterator)
 */
@Override
protected int getNextPosition(IDocument document, BreakIterator iter) {
	int pos = iter.current();
	int result = iter.next();
	if (result != BreakIterator.DONE) {
		result = checkDot(document,pos,result);
		result = checkUnder(document,result);			
	}
	return result;
}

Source File: DictionaryResource.java From newsleak with GNU Affero General Public License v3.0

5 votes

/**
 * Checks if a String is a multi word unit.
 *
 * @param t
 *            the t
 * @return true, if is multi word
 */
private boolean isMultiWord(String t) {
	BreakIterator tokenBreaker = BreakIterator.getWordInstance(locale);
	tokenBreaker.setText(t);

	// count tokens
	int pos = tokenBreaker.first();
	int nTokens = 0;
	while (pos != BreakIterator.DONE) {
		nTokens++;
		pos = tokenBreaker.next();
	}
	nTokens = nTokens / 2;
	return nTokens > 1;
}

Source File: MirroredBreakIterator.java From jdk8u-jdk with GNU General Public License v2.0

5 votes

MirroredBreakIterator(BreakIterator bi) {
    List<Integer> b = new ArrayList<Integer>();
    int i = bi.first();
    charIndex = i;
    for (; i != DONE; i = bi.next()) {
        b.add(i);
    }
    boundaries = Collections.unmodifiableList(b);
}

Source File: MirroredBreakIterator.java From jdk8u-jdk with GNU General Public License v2.0

5 votes

MirroredBreakIterator(BreakIterator bi) {
    List<Integer> b = new ArrayList<Integer>();
    int i = bi.first();
    charIndex = i;
    for (; i != DONE; i = bi.next()) {
        b.add(i);
    }
    boundaries = Collections.unmodifiableList(b);
}

Source File: NavigationActions.java From RichTextFX with BSD 2-Clause "Simplified" License

5 votes

/**
 * Skips n number of word boundaries forward.
 */
default void wordBreaksForwards(int n, SelectionPolicy selectionPolicy) {
    if(getLength() == 0) {
        return;
    }

    BreakIterator wordBreakIterator = BreakIterator.getWordInstance();
    wordBreakIterator.setText(getText());
    wordBreakIterator.following(getCaretPosition());
    for (int i = 1; i < n; i++) {
        wordBreakIterator.next();
    }

    moveTo(wordBreakIterator.current(), selectionPolicy);
}

Source File: MtasDocumentIndex.java From inception with Apache License 2.0

5 votes

private String preprocessQuery(String aQuery)
{
    String result;

    if (!(aQuery.contains("\"") || aQuery.contains("[") || aQuery.contains("]")
            || aQuery.contains("{") || aQuery.contains("}") || aQuery.contains("<")
            || aQuery.contains(">"))) {
        // Convert raw words query to a Mtas CQP query

        result = "";
        BreakIterator words = BreakIterator.getWordInstance();
        words.setText(aQuery);

        int start = words.first();
        int end = words.next();
        while (end != BreakIterator.DONE) {
            String word = aQuery.substring(start, end);
            if (!word.trim().isEmpty()) {
                // Add the word to the query
                result += "\"" + word + "\"";
            }
            start = end;
            end = words.next();
            if (end != BreakIterator.DONE) {
                result += " ";
            }
        }
    }
    else {
        result = aQuery;
    }

    return result;
}

Source File: DatePicker.java From nebula with Eclipse Public License 2.0

5 votes

/**
 * set / update the text of the displayLabels. these are the Week column
 * headers above the days on the Calendar part of the <code>CDateTime</code>
 * .
 */
private void updateDaysOfWeek() {
    if (dayPanel != null) {
        Calendar tmpcal = cdt.getCalendarInstance();
        tmpcal.set(Calendar.DAY_OF_WEEK, tmpcal.getFirstDayOfWeek());
        Locale locale = cdt.getLocale();
        boolean ltr = ComponentOrientation.getOrientation(locale)
                .isLeftToRight() && !locale.getLanguage().equals("zh"); //$NON-NLS-1$
        BreakIterator iterator = BreakIterator.getCharacterInstance(locale);
        for (VLabel dayLabel : dayLabels) {
            String str = getFormattedDate("E", tmpcal.getTime()); //$NON-NLS-1$
            if (dayLabel.getData(CDT.Key.Compact, Boolean.class)) {
                iterator.setText(str);
                int start, end;
                if (ltr) {
                    start = iterator.first();
                    end = iterator.next();
                } else {
                    end = iterator.last();
                    start = iterator.previous();
                }
                dayLabel.setText(str.substring(start, end));
            } else {
                dayLabel.setText(str);
            }
            tmpcal.add(Calendar.DAY_OF_WEEK, 1);
        }
    }
}

Source File: BaseUtilities.java From netbeans with Apache License 2.0

4 votes

/** Wrap multi-line strings (and get the individual lines).
    * @param original  the original string to wrap
    * @param width     the maximum width of lines
    * @param breakIterator breaks original to chars, words, sentences, depending on what instance you provide.
    * @param removeNewLines if <code>true</code>, any newlines in the original string are ignored
    * @return the lines after wrapping
    */
    public static String[] wrapStringToArray(
        String original, int width, BreakIterator breakIterator, boolean removeNewLines
    ) {
        if (original.length() == 0) {
            return new String[] { original };
        }

        String[] workingSet;

        // substitute original newlines with spaces,
        // remove newlines from head and tail
        if (removeNewLines) {
            original = trimString(original);
            original = original.replace('\n', ' ');
            workingSet = new String[] { original };
        } else {
            StringTokenizer tokens = new StringTokenizer(original, "\n"); // NOI18N
            int len = tokens.countTokens();
            workingSet = new String[len];

            for (int i = 0; i < len; i++) {
                workingSet[i] = tokens.nextToken();
            }
        }

        if (width < 1) {
            width = 1;
        }

        if (original.length() <= width) {
            return workingSet;
        }

widthcheck:  {
            boolean ok = true;

            for (int i = 0; i < workingSet.length; i++) {
                ok = ok && (workingSet[i].length() < width);

                if (!ok) {
                    break widthcheck;
                }
            }

            return workingSet;
        }

        java.util.ArrayList<String> lines = new java.util.ArrayList<String>();

        int lineStart = 0; // the position of start of currently processed line in the original string

        for (int i = 0; i < workingSet.length; i++) {
            if (workingSet[i].length() < width) {
                lines.add(workingSet[i]);
            } else {
                breakIterator.setText(workingSet[i]);

                int nextStart = breakIterator.next();
                int prevStart = 0;

                do {
                    while (((nextStart - lineStart) < width) && (nextStart != BreakIterator.DONE)) {
                        prevStart = nextStart;
                        nextStart = breakIterator.next();
                    }

                    if (nextStart == BreakIterator.DONE) {
                        nextStart = prevStart = workingSet[i].length();
                    }

                    if (prevStart == 0) {
                        prevStart = nextStart;
                    }

                    lines.add(workingSet[i].substring(lineStart, prevStart));

                    lineStart = prevStart;
                    prevStart = 0;
                } while (lineStart < workingSet[i].length());

                lineStart = 0;
            }
        }

        String[] s = new String[lines.size()];

        return lines.toArray(s);
    }

Source File: PlainText.java From gcs with Mozilla Public License 2.0

4 votes

/**
 * Break the paragraph into individual lines.
 * 
 * @param font the font used for rendering the text.
 * @param fontSize the fontSize used for rendering the text.
 * @param width the width of the box holding the content.
 * @return the individual lines.
 * @throws IOException
 */
List<Line> getLines(PDFont font, float fontSize, float width) throws IOException
{
    BreakIterator iterator = BreakIterator.getLineInstance();
    iterator.setText(textContent);
    
    final float scale = fontSize/FONTSCALE;
    
    int start = iterator.first();
    int end = iterator.next();
    float lineWidth = 0;
    
    List<Line> textLines = new ArrayList<Line>();
    Line textLine = new Line();

    while (end != BreakIterator.DONE)
    {
        String word = textContent.substring(start,end);
        float wordWidth = font.getStringWidth(word) * scale;
        
        lineWidth = lineWidth + wordWidth;

        // check if the last word would fit without the whitespace ending it
        if (lineWidth >= width && Character.isWhitespace(word.charAt(word.length()-1)))
        {
            float whitespaceWidth = font.getStringWidth(word.substring(word.length()-1)) * scale;
            lineWidth = lineWidth - whitespaceWidth;
        }
        
        if (lineWidth >= width)
        {
            textLine.setWidth(textLine.calculateWidth(font, fontSize));
            textLines.add(textLine);
            textLine = new Line();
            lineWidth = font.getStringWidth(word) * scale;
        }
        
        AttributedString as = new AttributedString(word);
        as.addAttribute(TextAttribute.WIDTH, wordWidth);
        Word wordInstance = new Word(word);
        wordInstance.setAttributes(as);
        textLine.addWord(wordInstance);
        start = end;
        end = iterator.next();
    }
    textLine.setWidth(textLine.calculateWidth(font, fontSize));
    textLines.add(textLine);
    return textLines;
}

Source File: BreakIteratorTest.java From TencentKona-8 with GNU General Public License v2.0

4 votes

/**
 * Bug 4638433
 */
public void TestLineBreakBasedOnUnicode3_0_0()
{
    BreakIterator iter;
    int i;

    /* Latin Extend-B characters
     * 0x0218-0x0233 which have been added since Unicode 3.0.0.
     */
    iter = BreakIterator.getWordInstance(Locale.US);
    iter.setText("\u0216\u0217\u0218\u0219\u021A");
    i = iter.first();
    i = iter.next();
    if (i != 5) {
        errln("Word break failure: failed to stop at 5 and bounded at " + i);
    }


    iter = BreakIterator.getLineInstance(Locale.US);

    /* <Three(Nd)><Two(Nd)><Low Double Prime Quotation Mark(Pe)><One(Nd)>
     * \u301f has changed its category from Ps to Pe since Unicode 2.1.
     */
    iter.setText("32\u301f1");
    i = iter.first();
    i = iter.next();
    if (i != 3) {
        errln("Line break failure: failed to skip before \\u301F(Pe) at 3 and bounded at " + i);
    }

    /* Mongolian <Letter A(Lo)><Todo Soft Hyphen(Pd)><Letter E(Lo)>
     * which have been added since Unicode 3.0.0.
     */
    iter.setText("\u1820\u1806\u1821");
    i = iter.first();
    i = iter.next();
    if (i != 2) {
        errln("Mongolian line break failure: failed to skip position before \\u1806(Pd) at 2 and bounded at " + i);
    }

    /* Khmer <ZERO(Nd)><Currency Symbol(Sc)><ONE(Nd)> which have
     * been added since Unicode 3.0.0.
     */
    iter.setText("\u17E0\u17DB\u17E1");
    i = iter.first();
    i = iter.next();
    if (i != 1) {
        errln("Khmer line break failure: failed to stop before \\u17DB(Sc) at 1 and bounded at " + i);
    }
    i = iter.next();
    if (i != 3) {
        errln("Khmer line break failure: failed to skip position after \\u17DB(Sc) at 3 and bounded at " + i);
    }

    /* Ogham <Letter UR(Lo)><Space Mark(Zs)><Letter OR(Lo)> which have
     * been added since Unicode 3.0.0.
     */
    iter.setText("\u1692\u1680\u1696");
    i = iter.first();
    i = iter.next();
    if (i != 2) {
        errln("Ogham line break failure: failed to skip postion before \\u1680(Zs) at 2 and bounded at " + i);
    }


    // Confirm changes in BreakIteratorRules_th.java have been reflected.
    iter = BreakIterator.getLineInstance(new Locale("th", ""));

    /* Thai <Seven(Nd)>
     *      <Left Double Quotation Mark(Pi)>
     *      <Five(Nd)>
     *      <Right Double Quotation Mark(Pf)>
     *      <Three(Nd)>
     */
    iter.setText("\u0E57\u201C\u0E55\u201D\u0E53");
    i = iter.first();
    i = iter.next();
    if (i != 1) {
        errln("Thai line break failure: failed to stop before \\u201C(Pi) at 1 and bounded at " + i);
    }
    i = iter.next();
    if (i != 4) {
        errln("Thai line break failure: failed to stop after \\u201D(Pf) at 4 and bounded at " + i);
    }
}

Source File: Utils.java From moa with GNU General Public License v3.0

4 votes

/**
  * Breaks up the string, if wider than "columns" characters.
  *
  * @param s		the string to process
  * @param columns	the width in columns
  * @return		the processed string
  */
 public static String[] breakUp(String s, int columns) {
   Vector<String>	result;
   String		line;
   BreakIterator	boundary;
   int			boundaryStart;
   int			boundaryEnd;
   String		word;
   String		punctuation;
   int			i;
   String[]		lines;

   result      = new Vector<String>();
   punctuation = " .,;:!?'\"";
   lines       = s.split("\n");

   for (i = 0; i < lines.length; i++) {
     boundary      = BreakIterator.getWordInstance();
     boundary.setText(lines[i]);
     boundaryStart = boundary.first();
     boundaryEnd   = boundary.next();
     line          = "";

     while (boundaryEnd != BreakIterator.DONE) {
word = lines[i].substring(boundaryStart, boundaryEnd);
if (line.length() >= columns) {
  if (word.length() == 1) {
    if (punctuation.indexOf(word.charAt(0)) > -1) {
      line += word;
      word = "";
    }
  }
  result.add(line);
  line = "";
}
line          += word;
boundaryStart  = boundaryEnd;
boundaryEnd    = boundary.next();
     }
     if (line.length() > 0)
result.add(line);
   }

   return result.toArray(new String[result.size()]);
 }

Source File: BreakIteratorTest.java From dragonwell8_jdk with GNU General Public License v2.0

4 votes

private void doBreakInvariantTest(BreakIterator tb, String testChars)
{
    StringBuffer work = new StringBuffer("aaa");
    int errorCount = 0;

    // a break should always occur after CR (unless followed by LF), LF, PS, and LS
    String breaks = /*"\r\n\u2029\u2028"*/"\n\u2029\u2028";
                        // change this back when new BI code is added

    for (int i = 0; i < breaks.length(); i++) {
        work.setCharAt(1, breaks.charAt(i));
        for (int j = 0; j < testChars.length(); j++) {
            work.setCharAt(0, testChars.charAt(j));
            for (int k = 0; k < testChars.length(); k++) {
                char c = testChars.charAt(k);

                // if a cr is followed by lf, don't do the check (they stay together)
                if (work.charAt(1) == '\r' && (c == '\n'))
                    continue;

                // CONTROL (Cc) and FORMAT (Cf) Characters are to be ignored
                // for breaking purposes as per UTR14
                int type1 = Character.getType(work.charAt(1));
                int type2 = Character.getType(c);
                if (type1 == Character.CONTROL || type1 == Character.FORMAT ||
                    type2 == Character.CONTROL || type2 == Character.FORMAT) {
                    continue;
                }

                work.setCharAt(2, c);
                tb.setText(work.toString());
                boolean seen2 = false;
                for (int l = tb.first(); l != BreakIterator.DONE; l = tb.next()) {
                    if (l == 2)
                        seen2 = true;
                }
                if (!seen2) {
                    errln("No break between U+" + Integer.toHexString((int)(work.charAt(1)))
                                + " and U+" + Integer.toHexString((int)(work.charAt(2))));
                    errorCount++;
                    if (errorCount >= 75)
                        return;
                }
            }
        }
    }
}

Source File: BreakIteratorTest.java From openjdk-jdk9 with GNU General Public License v2.0

4 votes

private void doOtherInvariantTest(BreakIterator tb, String testChars)
{
    StringBuffer work = new StringBuffer("a\r\na");
    int errorCount = 0;

    // a break should never occur between CR and LF
    for (int i = 0; i < testChars.length(); i++) {
        work.setCharAt(0, testChars.charAt(i));
        for (int j = 0; j < testChars.length(); j++) {
            work.setCharAt(3, testChars.charAt(j));
            tb.setText(work.toString());
            for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
                if (k == 2) {
                    errln("Break between CR and LF in string U+" + Integer.toHexString(
                            (int)(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(
                            (int)(work.charAt(3))));
                    errorCount++;
                    if (errorCount >= 75)
                        return;
                }
        }
    }

    // a break should never occur before a non-spacing mark, unless it's preceded
    // by a line terminator
    work.setLength(0);
    work.append("aaaa");
    for (int i = 0; i < testChars.length(); i++) {
        char c = testChars.charAt(i);
        if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003')
            continue;
        work.setCharAt(1, c);
        for (int j = 0; j < testChars.length(); j++) {
            c = testChars.charAt(j);
            if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c)
                    != Character.ENCLOSING_MARK)
                continue;
            work.setCharAt(2, c);

            // CONTROL (Cc) and FORMAT (Cf) Characters are to be ignored
            // for breaking purposes as per UTR14
            int type1 = Character.getType(work.charAt(1));
            int type2 = Character.getType(work.charAt(2));
            if (type1 == Character.CONTROL || type1 == Character.FORMAT ||
                type2 == Character.CONTROL || type2 == Character.FORMAT) {
                continue;
            }

            tb.setText(work.toString());
            for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
                if (k == 2) {
                    errln("Break between U+" + Integer.toHexString((int)(work.charAt(1)))
                            + " and U+" + Integer.toHexString((int)(work.charAt(2))));
                    errorCount++;
                    if (errorCount >= 75)
                        return;
                }
        }
    }
}

Source File: StemmingLemaEx.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

4 votes

public static void main(String args[]){
    String words[] = {"bank", "banking", "banks", "banker", "banked", 
"bankart"};
    PorterStemmer ps = new PorterStemmer();
    for(String w : words){
        String stem = ps.stem(w);
        System.out.println("Word : " + w + " Stem : " + stem);
    }
    String paragraph = "When determining the end of sentences "
        + "we need to consider several factors. Sentences may end with "
        + "exclamation marks! Or possibly questions marks? Within "
        + "sentences we may find numbers like 3.14159, abbreviations "
        + "such as found in Mr. Smith, and possibly ellipses either "
        + "within a sentence …, or at the end of a sentence…";
    String simple = "[.?!]";
    String[] splitString = (paragraph.split(simple));
    for (String string : splitString) {
        System.out.println(string);
    }
    System.out.println("-------------Using Pattern and Matcher-------------");
    Pattern sentencePattern = Pattern.compile(
        "# Match a sentence ending in punctuation or EOS.\n"
        + "[^.!?\\s]    # First char is non-punct, non-ws\n"
        + "[^.!?]*      # Greedily consume up to punctuation.\n"
        + "(?:          # Group for unrolling the loop.\n"
        + "  [.!?]      # (special) inner punctuation ok if\n"
        + "  (?!['\"]?\\s|$)  # not followed by ws or EOS.\n"
        + "  [^.!?]*    # Greedily consume up to punctuation.\n"
        + ")*           # Zero or more (special normal*)\n"
        + "[.!?]?       # Optional ending punctuation.\n"
        + "['\"]?       # Optional closing quote.\n"
        + "(?=\\s|$)",
        Pattern.MULTILINE | Pattern.COMMENTS);
    Matcher matcher = sentencePattern.matcher(paragraph);
    while (matcher.find()) {
        System.out.println(matcher.group());
    }
    System.out.println("-------------Using BreakIterator-------------");
    BreakIterator si = BreakIterator.getSentenceInstance();
    Locale cl = new Locale("en", "US");
    si.setText(paragraph);
    int boundary = si.first();
    while(boundary!=BreakIterator.DONE){
        int begin = boundary;
        System.out.println(boundary + " - ");
        boundary = si.next();
        int end = boundary;
        if(end == BreakIterator.DONE){
            break;
        }
        System.out.println(boundary + " [ " + paragraph.substring(begin,end) + " ] ");
    }
    System.out.println("-------------Using SentenceDetectorME-------------");
    try{
        InputStream is = new FileInputStream(new File("/home/ashish/Downloads/" + "en-sent.bin"));
        SentenceModel sm = new SentenceModel(is);
        SentenceDetectorME detector = new SentenceDetectorME(sm);
        String sentences [] = detector.sentDetect(paragraph);
        for(String s : sentences){
            System.out.println(s);
        }
    }
    catch(IOException e){
        System.out.println("Error Detected" + e);
        e.printStackTrace();
    }
}

Java Code Examples for java.text.BreakIterator#next()