java.text.BreakIterator#setText

Source File: MyFormatter.java From triplea with GNU General Public License v3.0

6 votes

/**
 * Adds HTML line breaks and indentation to a string so it wraps for things like long tooltips.
 *
 * <pre>
 * string part 1
 *           string part 2
 *           ...
 *           string part X
 * </pre>
 */
public static String addHtmlBreaksAndIndents(
    final String target, final int firstLineMaxLength, final int maxLength) {
  final StringBuilder sb = new StringBuilder();
  final BreakIterator breakIterator = BreakIterator.getLineInstance();
  breakIterator.setText(target);
  int start = breakIterator.first();
  int end = breakIterator.next();
  int lineLength = 0;
  int currentMaxLength = firstLineMaxLength;
  while (end != BreakIterator.DONE) {
    final String word = target.substring(start, end);
    lineLength = lineLength + word.length();
    if (lineLength >= currentMaxLength) {
      sb.append("<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;");
      lineLength = word.length() + 5; // Add 5 for the indent
      currentMaxLength = maxLength;
    }
    sb.append(word);
    start = end;
    end = breakIterator.next();
  }
  return sb.toString();
}

Source File: Chapter2.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

6 votes

private static void usingTheBreakIterator() {
    Locale currentLocale = new Locale("en", "US");
    BreakIterator wordIterator
            = BreakIterator.getWordInstance();
    String text = "Let's pause, and then reflect.";
    wordIterator.setText(text);
    int boundary = wordIterator.first();
    while (boundary != BreakIterator.DONE) {
        int begin = boundary;
        System.out.print(boundary + "-");
        boundary = wordIterator.next();
        int end = boundary;
        if(end == BreakIterator.DONE) break;
        System.out.println(boundary + " [" + 
                text.substring(begin, end) + "]");
    }
}

Source File: BreakIteratorTest.java From dragonwell8_jdk with GNU General Public License v2.0

6 votes

public void TestBug4153072() {
    BreakIterator iter = BreakIterator.getWordInstance();
    String str = "...Hello, World!...";
    int begin = 3;
    int end = str.length() - 3;
    boolean gotException = false;
    boolean dummy;

    iter.setText(new StringCharacterIterator(str, begin, end, begin));
    for (int index = -1; index < begin + 1; ++index) {
        try {
            dummy = iter.isBoundary(index);
            if (index < begin)
                errln("Didn't get exception with offset = " + index +
                                " and begin index = " + begin);
        }
        catch (IllegalArgumentException e) {
            if (index >= begin)
                errln("Got exception with offset = " + index +
                                " and begin index = " + begin);
        }
    }
}

Source File: BreakIteratorTest.java From openjdk-jdk8u with GNU General Public License v2.0

6 votes

public void TestBug4153072() {
    BreakIterator iter = BreakIterator.getWordInstance();
    String str = "...Hello, World!...";
    int begin = 3;
    int end = str.length() - 3;
    boolean gotException = false;
    boolean dummy;

    iter.setText(new StringCharacterIterator(str, begin, end, begin));
    for (int index = -1; index < begin + 1; ++index) {
        try {
            dummy = iter.isBoundary(index);
            if (index < begin)
                errln("Didn't get exception with offset = " + index +
                                " and begin index = " + begin);
        }
        catch (IllegalArgumentException e) {
            if (index >= begin)
                errln("Got exception with offset = " + index +
                                " and begin index = " + begin);
        }
    }
}

Source File: CapitalizeWordsInSentence.java From levelup-java-examples with Apache License 2.0

5 votes

private static int nextWordStartAfter(int pos, String text) {
    BreakIterator wb = BreakIterator.getWordInstance();
    wb.setText(text);
    int last = wb.following(pos);
    int current = wb.next();
    while (current != BreakIterator.DONE) {
        for (int p = last; p < current; p++) {
            if (Character.isLetter(text.codePointAt(p)))
                return last;
        }
        last = current;
        current = wb.next();
    }
    return BreakIterator.DONE;
}

Source File: AccessibleHTML.java From openjdk-8 with GNU General Public License v2.0

5 votes

/**
 * Returns the Segment at <code>index</code> representing either
 * the paragraph or sentence as identified by <code>part</code>, or
 * null if a valid paragraph/sentence can't be found. The offset
 * will point to the start of the word/sentence in the array, and
 * the modelOffset will point to the location of the word/sentence
 * in the model.
 */
private IndexedSegment getSegmentAt(int part, int index)
    throws BadLocationException {

    IndexedSegment seg = getParagraphElementText(index);
    if (seg == null) {
        return null;
    }
    BreakIterator iterator;
    switch (part) {
    case AccessibleText.WORD:
        iterator = BreakIterator.getWordInstance(getLocale());
        break;
    case AccessibleText.SENTENCE:
        iterator = BreakIterator.getSentenceInstance(getLocale());
        break;
    default:
        return null;
    }
    seg.first();
    iterator.setText(seg);
    int end = iterator.following(index - seg.modelOffset + seg.offset);
    if (end == BreakIterator.DONE) {
        return null;
    }
    if (end > seg.offset + seg.count) {
        return null;
    }
    int begin = iterator.previous();
    if (begin == BreakIterator.DONE ||
        begin >= seg.offset + seg.count) {
        return null;
    }
    seg.modelOffset = seg.modelOffset + begin - seg.offset;
    seg.offset = begin;
    seg.count = end - begin;
    return seg;
}

Source File: AccessibleHTML.java From jdk8u-dev-jdk with GNU General Public License v2.0

5 votes

/**
 * Returns the Segment at <code>index</code> representing either
 * the paragraph or sentence as identified by <code>part</code>, or
 * null if a valid paragraph/sentence can't be found. The offset
 * will point to the start of the word/sentence in the array, and
 * the modelOffset will point to the location of the word/sentence
 * in the model.
 */
private IndexedSegment getSegmentAt(int part, int index)
    throws BadLocationException {

    IndexedSegment seg = getParagraphElementText(index);
    if (seg == null) {
        return null;
    }
    BreakIterator iterator;
    switch (part) {
    case AccessibleText.WORD:
        iterator = BreakIterator.getWordInstance(getLocale());
        break;
    case AccessibleText.SENTENCE:
        iterator = BreakIterator.getSentenceInstance(getLocale());
        break;
    default:
        return null;
    }
    seg.first();
    iterator.setText(seg);
    int end = iterator.following(index - seg.modelOffset + seg.offset);
    if (end == BreakIterator.DONE) {
        return null;
    }
    if (end > seg.offset + seg.count) {
        return null;
    }
    int begin = iterator.previous();
    if (begin == BreakIterator.DONE ||
        begin >= seg.offset + seg.count) {
        return null;
    }
    seg.modelOffset = seg.modelOffset + begin - seg.offset;
    seg.offset = begin;
    seg.count = end - begin;
    return seg;
}

Source File: CaretSelectionBindImpl.java From RichTextFX with BSD 2-Clause "Simplified" License

5 votes

@Override
public void updateEndByBreaksForward(int numOfBreaks, BreakIterator breakIterator) {
    if (getAreaLength() == 0) {
        return;
    }

    breakIterator.setText(getArea().getText());
    int position = calculatePositionViaBreakingForwards(numOfBreaks, breakIterator, getStartPosition());
    updateEndTo(position);
}

Source File: ConditionalSpecialCasing.java From jdk8u-jdk with GNU General Public License v2.0

5 votes

/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}

Source File: Bug4912404.java From dragonwell8_jdk with GNU General Public License v2.0

5 votes

public static void main(String[] args) {
    BreakIterator b = BreakIterator.getWordInstance();
    b.setText("abc");
    if (b.equals(null)) {
        throw new RuntimeException("BreakIterator.equals(null) should return false.");
    }
}

Source File: AccessibleHTML.java From dragonwell8_jdk with GNU General Public License v2.0

5 votes

/**
 * Returns the Segment at <code>index</code> representing either
 * the paragraph or sentence as identified by <code>part</code>, or
 * null if a valid paragraph/sentence can't be found. The offset
 * will point to the start of the word/sentence in the array, and
 * the modelOffset will point to the location of the word/sentence
 * in the model.
 */
private IndexedSegment getSegmentAt(int part, int index)
    throws BadLocationException {

    IndexedSegment seg = getParagraphElementText(index);
    if (seg == null) {
        return null;
    }
    BreakIterator iterator;
    switch (part) {
    case AccessibleText.WORD:
        iterator = BreakIterator.getWordInstance(getLocale());
        break;
    case AccessibleText.SENTENCE:
        iterator = BreakIterator.getSentenceInstance(getLocale());
        break;
    default:
        return null;
    }
    seg.first();
    iterator.setText(seg);
    int end = iterator.following(index - seg.modelOffset + seg.offset);
    if (end == BreakIterator.DONE) {
        return null;
    }
    if (end > seg.offset + seg.count) {
        return null;
    }
    int begin = iterator.previous();
    if (begin == BreakIterator.DONE ||
        begin >= seg.offset + seg.count) {
        return null;
    }
    seg.modelOffset = seg.modelOffset + begin - seg.offset;
    seg.offset = begin;
    seg.count = end - begin;
    return seg;
}

Source File: AbstractWordAwareDoubleClickStrategy.java From xtext-eclipse with Eclipse Public License 2.0

5 votes

@Override
protected IRegion findWord(IDocument document, int offset) {
	try {
		IRegion line = document.getLineInformationOfOffset(offset);

		if (offset == line.getOffset() + line.getLength())
			return null;

		BreakIterator breakIter = createBreakIterator();
		CharacterIterator characterIterator = new DocumentCharacterIterator(document);
		breakIter.setText(characterIterator);
		int start = breakIter.preceding(offset);
		if (start == BreakIterator.DONE)
			start = line.getOffset();

		int end = breakIter.following(offset);
		if (end == BreakIterator.DONE)
			end = line.getOffset() + line.getLength();

		if (breakIter.isBoundary(offset)) {
			if (end - offset > offset - start)
				start = offset;
			else
				end = offset;
		}

		if (end == start)
			return null;
		return new Region(start, end - start);
	} catch (BadLocationException e) {
		return null;
	}
}

Source File: ConditionalSpecialCasing.java From jdk-1.7-annotated with Apache License 2.0

5 votes

/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}

Source File: AccessibleHTML.java From jdk1.8-source-analysis with Apache License 2.0

5 votes

/**
 * Returns the Segment at <code>index</code> representing either
 * the paragraph or sentence as identified by <code>part</code>, or
 * null if a valid paragraph/sentence can't be found. The offset
 * will point to the start of the word/sentence in the array, and
 * the modelOffset will point to the location of the word/sentence
 * in the model.
 */
private IndexedSegment getSegmentAt(int part, int index)
    throws BadLocationException {

    IndexedSegment seg = getParagraphElementText(index);
    if (seg == null) {
        return null;
    }
    BreakIterator iterator;
    switch (part) {
    case AccessibleText.WORD:
        iterator = BreakIterator.getWordInstance(getLocale());
        break;
    case AccessibleText.SENTENCE:
        iterator = BreakIterator.getSentenceInstance(getLocale());
        break;
    default:
        return null;
    }
    seg.first();
    iterator.setText(seg);
    int end = iterator.following(index - seg.modelOffset + seg.offset);
    if (end == BreakIterator.DONE) {
        return null;
    }
    if (end > seg.offset + seg.count) {
        return null;
    }
    int begin = iterator.previous();
    if (begin == BreakIterator.DONE ||
        begin >= seg.offset + seg.count) {
        return null;
    }
    seg.modelOffset = seg.modelOffset + begin - seg.offset;
    seg.offset = begin;
    seg.count = end - begin;
    return seg;
}

Source File: TestOpenNLPSentenceBreakIterator.java From lucene-solr with Apache License 2.0

5 votes

public void testSliceEnd() throws Exception {
  NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
  BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
  bi.setText(getCharArrayIterator(SENTENCES[0] + PADDING, 0, SENTENCES[0].length()));

  test1Sentence(bi, SENTENCES[0]);
}

Source File: LogWriterImpl.java From gemfirexd-oss with Apache License 2.0

4 votes

static void formatText(PrintWriter writer, String target,
                         int initialLength) {
    BreakIterator boundary = BreakIterator.getLineInstance();
    boundary.setText(target);
    int start = boundary.first();
    int end = boundary.next();
    int lineLength = initialLength;

    while (end != BreakIterator.DONE) {
      // Look at the end and only accept whitespace breaks
      char endChar = target.charAt(end-1);
      while (!Character.isWhitespace(endChar)) {
        int lastEnd = end;
        end = boundary.next();
        if (end == BreakIterator.DONE) {
          // give up. We are at the end of the string
          end = lastEnd;
          break;
        }
        endChar = target.charAt(end-1);
      }
      int wordEnd = end;
      if (endChar == '\n') {
        // trim off the \n since println will do it for us
        wordEnd--;
        if (wordEnd > 0 && target.charAt(wordEnd-1) == '\r') {
          wordEnd--;
        }
      } else if (endChar == '\t') {
        // figure tabs use 8 characters
        lineLength += 7;
      }
    String word = target.substring(start, wordEnd);
    lineLength += word.length();
    writer.print(word);
    if (endChar == '\n' || endChar == '\r') {
      // force end of line
      writer.println();
      writer.print("  ");
      lineLength = 2;
    }
    start = end;
    end = boundary.next();
  }
  if (lineLength != 0) {
    writer.println();
  }
}

Source File: WordCountService.java From mojito with Apache License 2.0

4 votes

/**
 * Gets the number of words in the string assuming the string is in English.
 * 
 * This implementation doesn't know about placeholders. They are counted as
 * word. Later, we can do something more clever using an 
 * Okapi step later to exclude them.
 *
 * @param string
 * @return number of word
 */
public int getEnglishWordCount(String string) {

    int wordCount = 0;

    BreakIterator wordBreakIterator = BreakIterator.getWordInstance(Locale.ENGLISH);

    wordBreakIterator.setText(string);

    int start = wordBreakIterator.first();
    int end = wordBreakIterator.next();

    while (end != BreakIterator.DONE) {
        
        if (Character.isLetterOrDigit(string.charAt(start))) {
            wordCount += 1;
        }
        
        start = end;
        end = wordBreakIterator.next();
    }

    return wordCount;
}

Source File: GlyphView.java From dragonwell8_jdk with GNU General Public License v2.0

4 votes

/**
 * Returns a location to break at in the passed in region, or
 * BreakIterator.DONE if there isn't a good location to break at
 * in the specified region.
 */
private int getBreakSpot(int p0, int p1) {
    if (breakSpots == null) {
        // Re-calculate breakpoints for the whole view
        int start = getStartOffset();
        int end = getEndOffset();
        int[] bs = new int[end + 1 - start];
        int ix = 0;

        // Breaker should work on the parent element because there may be
        // a valid breakpoint at the end edge of the view (space, etc.)
        Element parent = getElement().getParentElement();
        int pstart = (parent == null ? start : parent.getStartOffset());
        int pend = (parent == null ? end : parent.getEndOffset());

        Segment s = getText(pstart, pend);
        s.first();
        BreakIterator breaker = getBreaker();
        breaker.setText(s);

        // Backward search should start from end+1 unless there's NO end+1
        int startFrom = end + (pend > end ? 1 : 0);
        for (;;) {
            startFrom = breaker.preceding(s.offset + (startFrom - pstart))
                      + (pstart - s.offset);
            if (startFrom > start) {
                // The break spot is within the view
                bs[ix++] = startFrom;
            } else {
                break;
            }
        }

        SegmentCache.releaseSharedSegment(s);
        breakSpots = new int[ix];
        System.arraycopy(bs, 0, breakSpots, 0, ix);
    }

    int breakSpot = BreakIterator.DONE;
    for (int i = 0; i < breakSpots.length; i++) {
        int bsp = breakSpots[i];
        if (bsp <= p1) {
            if (bsp > p0) {
                breakSpot = bsp;
            }
            break;
        }
    }
    return breakSpot;
}

Source File: GlyphView.java From openjdk-jdk9 with GNU General Public License v2.0

4 votes

/**
 * Returns a location to break at in the passed in region, or
 * BreakIterator.DONE if there isn't a good location to break at
 * in the specified region.
 */
private int getBreakSpot(int p0, int p1) {
    if (breakSpots == null) {
        // Re-calculate breakpoints for the whole view
        int start = getStartOffset();
        int end = getEndOffset();
        int[] bs = new int[end + 1 - start];
        int ix = 0;

        // Breaker should work on the parent element because there may be
        // a valid breakpoint at the end edge of the view (space, etc.)
        Element parent = getElement().getParentElement();
        int pstart = (parent == null ? start : parent.getStartOffset());
        int pend = (parent == null ? end : parent.getEndOffset());

        Segment s = getText(pstart, pend);
        s.first();
        BreakIterator breaker = getBreaker();
        breaker.setText(s);

        // Backward search should start from end+1 unless there's NO end+1
        int startFrom = end + (pend > end ? 1 : 0);
        for (;;) {
            startFrom = breaker.preceding(s.offset + (startFrom - pstart))
                      + (pstart - s.offset);
            if (startFrom > start) {
                // The break spot is within the view
                bs[ix++] = startFrom;
            } else {
                break;
            }
        }

        SegmentCache.releaseSharedSegment(s);
        breakSpots = new int[ix];
        System.arraycopy(bs, 0, breakSpots, 0, ix);
    }

    int breakSpot = BreakIterator.DONE;
    for (int i = 0; i < breakSpots.length; i++) {
        int bsp = breakSpots[i];
        if (bsp <= p1) {
            if (bsp > p0) {
                breakSpot = bsp;
            }
            break;
        }
    }
    return breakSpot;
}

Source File: GlyphView.java From jdk8u-dev-jdk with GNU General Public License v2.0

4 votes

/**
 * Returns a location to break at in the passed in region, or
 * BreakIterator.DONE if there isn't a good location to break at
 * in the specified region.
 */
private int getBreakSpot(int p0, int p1) {
    if (breakSpots == null) {
        // Re-calculate breakpoints for the whole view
        int start = getStartOffset();
        int end = getEndOffset();
        int[] bs = new int[end + 1 - start];
        int ix = 0;

        // Breaker should work on the parent element because there may be
        // a valid breakpoint at the end edge of the view (space, etc.)
        Element parent = getElement().getParentElement();
        int pstart = (parent == null ? start : parent.getStartOffset());
        int pend = (parent == null ? end : parent.getEndOffset());

        Segment s = getText(pstart, pend);
        s.first();
        BreakIterator breaker = getBreaker();
        breaker.setText(s);

        // Backward search should start from end+1 unless there's NO end+1
        int startFrom = end + (pend > end ? 1 : 0);
        for (;;) {
            startFrom = breaker.preceding(s.offset + (startFrom - pstart))
                      + (pstart - s.offset);
            if (startFrom > start) {
                // The break spot is within the view
                bs[ix++] = startFrom;
            } else {
                break;
            }
        }

        SegmentCache.releaseSharedSegment(s);
        breakSpots = new int[ix];
        System.arraycopy(bs, 0, breakSpots, 0, ix);
    }

    int breakSpot = BreakIterator.DONE;
    for (int i = 0; i < breakSpots.length; i++) {
        int bsp = breakSpots[i];
        if (bsp <= p1) {
            if (bsp > p0) {
                breakSpot = bsp;
            }
            break;
        }
    }
    return breakSpot;
}

Java Code Examples for java.text.BreakIterator#setText()