java.text.BreakIterator#isBoundary

Source File: BreakIteratorTest.java From jdk8u_jdk with GNU General Public License v2.0

6 votes

private void testIsBoundary(BreakIterator bi, String text, int[] boundaries) {
    logln("testIsBoundary():");
    int p = 1;
    boolean isB;
    for (int i = 0; i <= text.length(); i++) {  // change to <= when new BI code goes in
        isB = bi.isBoundary(i);
        logln("bi.isBoundary(" + i + ") -> " + isB);

        if (i == boundaries[p]) {
            if (!isB)
                errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
            ++p;
        }
        else {
            if (isB)
                errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
        }
    }
}

Source File: BreakIteratorTest.java From openjdk-jdk9 with GNU General Public License v2.0

6 votes

public void TestBug4153072() {
    BreakIterator iter = BreakIterator.getWordInstance();
    String str = "...Hello, World!...";
    int begin = 3;
    int end = str.length() - 3;
    boolean gotException = false;
    boolean dummy;

    iter.setText(new StringCharacterIterator(str, begin, end, begin));
    for (int index = -1; index < begin + 1; ++index) {
        try {
            dummy = iter.isBoundary(index);
            if (index < begin)
                errln("Didn't get exception with offset = " + index +
                                " and begin index = " + begin);
        }
        catch (IllegalArgumentException e) {
            if (index >= begin)
                errln("Got exception with offset = " + index +
                                " and begin index = " + begin);
        }
    }
}

Source File: BreakIteratorTest.java From jdk8u_jdk with GNU General Public License v2.0

6 votes

public void TestBug4153072() {
    BreakIterator iter = BreakIterator.getWordInstance();
    String str = "...Hello, World!...";
    int begin = 3;
    int end = str.length() - 3;
    boolean gotException = false;
    boolean dummy;

    iter.setText(new StringCharacterIterator(str, begin, end, begin));
    for (int index = -1; index < begin + 1; ++index) {
        try {
            dummy = iter.isBoundary(index);
            if (index < begin)
                errln("Didn't get exception with offset = " + index +
                                " and begin index = " + begin);
        }
        catch (IllegalArgumentException e) {
            if (index >= begin)
                errln("Got exception with offset = " + index +
                                " and begin index = " + begin);
        }
    }
}

Source File: BreakIteratorTest.java From TencentKona-8 with GNU General Public License v2.0

6 votes

private void testIsBoundary(BreakIterator bi, String text, int[] boundaries) {
    logln("testIsBoundary():");
    int p = 1;
    boolean isB;
    for (int i = 0; i <= text.length(); i++) {  // change to <= when new BI code goes in
        isB = bi.isBoundary(i);
        logln("bi.isBoundary(" + i + ") -> " + isB);

        if (i == boundaries[p]) {
            if (!isB)
                errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
            ++p;
        }
        else {
            if (isB)
                errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
        }
    }
}

Source File: BreakIteratorTest.java From TencentKona-8 with GNU General Public License v2.0

6 votes

public void TestBug4153072() {
    BreakIterator iter = BreakIterator.getWordInstance();
    String str = "...Hello, World!...";
    int begin = 3;
    int end = str.length() - 3;
    boolean gotException = false;
    boolean dummy;

    iter.setText(new StringCharacterIterator(str, begin, end, begin));
    for (int index = -1; index < begin + 1; ++index) {
        try {
            dummy = iter.isBoundary(index);
            if (index < begin)
                errln("Didn't get exception with offset = " + index +
                                " and begin index = " + begin);
        }
        catch (IllegalArgumentException e) {
            if (index >= begin)
                errln("Got exception with offset = " + index +
                                " and begin index = " + begin);
        }
    }
}

Source File: BreakIteratorTest.java From openjdk-jdk8u with GNU General Public License v2.0

6 votes

private void testIsBoundary(BreakIterator bi, String text, int[] boundaries) {
    logln("testIsBoundary():");
    int p = 1;
    boolean isB;
    for (int i = 0; i <= text.length(); i++) {  // change to <= when new BI code goes in
        isB = bi.isBoundary(i);
        logln("bi.isBoundary(" + i + ") -> " + isB);

        if (i == boundaries[p]) {
            if (!isB)
                errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
            ++p;
        }
        else {
            if (isB)
                errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
        }
    }
}

Source File: BreakIteratorTest.java From openjdk-jdk8u with GNU General Public License v2.0

6 votes

public void TestBug4153072() {
    BreakIterator iter = BreakIterator.getWordInstance();
    String str = "...Hello, World!...";
    int begin = 3;
    int end = str.length() - 3;
    boolean gotException = false;
    boolean dummy;

    iter.setText(new StringCharacterIterator(str, begin, end, begin));
    for (int index = -1; index < begin + 1; ++index) {
        try {
            dummy = iter.isBoundary(index);
            if (index < begin)
                errln("Didn't get exception with offset = " + index +
                                " and begin index = " + begin);
        }
        catch (IllegalArgumentException e) {
            if (index >= begin)
                errln("Got exception with offset = " + index +
                                " and begin index = " + begin);
        }
    }
}

Source File: ConditionalSpecialCasing.java From jdk8u-jdk with GNU General Public License v2.0

5 votes

/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}

Source File: ConditionalSpecialCasing.java From jdk1.8-source-analysis with Apache License 2.0

5 votes

/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}

Source File: ConditionalSpecialCasing.java From openjdk-jdk9 with GNU General Public License v2.0

5 votes

/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}

Source File: ConditionalSpecialCasing.java From Bytecoder with Apache License 2.0

5 votes

/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}

Source File: ConditionalSpecialCasing.java From openjdk-jdk8u-backup with GNU General Public License v2.0

5 votes

/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}

Source File: ConditionalSpecialCasing.java From openjdk-8 with GNU General Public License v2.0

5 votes

/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}

Source File: ConditionalSpecialCasing.java From Java8CN with Apache License 2.0

5 votes

/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}

Source File: AbstractWordAwareDoubleClickStrategy.java From xtext-eclipse with Eclipse Public License 2.0

5 votes

@Override
protected IRegion findWord(IDocument document, int offset) {
	try {
		IRegion line = document.getLineInformationOfOffset(offset);

		if (offset == line.getOffset() + line.getLength())
			return null;

		BreakIterator breakIter = createBreakIterator();
		CharacterIterator characterIterator = new DocumentCharacterIterator(document);
		breakIter.setText(characterIterator);
		int start = breakIter.preceding(offset);
		if (start == BreakIterator.DONE)
			start = line.getOffset();

		int end = breakIter.following(offset);
		if (end == BreakIterator.DONE)
			end = line.getOffset() + line.getLength();

		if (breakIter.isBoundary(offset)) {
			if (end - offset > offset - start)
				start = offset;
			else
				end = offset;
		}

		if (end == start)
			return null;
		return new Region(start, end - start);
	} catch (BadLocationException e) {
		return null;
	}
}

Source File: ConditionalSpecialCasing.java From TencentKona-8 with GNU General Public License v2.0

5 votes

/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}

Source File: ConditionalSpecialCasing.java From jdk-1.7-annotated with Apache License 2.0

5 votes

/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}

Source File: ConditionalSpecialCasing.java From openjdk-8-source with GNU General Public License v2.0

5 votes

/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}

Source File: HighlightsViewUtils.java From netbeans with Apache License 2.0

4 votes

/**
 * Calculate the position at which to break a line in a paragraph. A break offset of X means
 * that the character with index (X-1) in {@code paragraph} will be the last one on the physical
 * line.
 *
 * <p>The current implementation avoids creating lines with leading whitespace (when words are
 * separated by at most one whitespace character), allows lines to be broken after hyphens, and,
 * if {@code allowWhitespaceBeyondEnd} is true, allows one whitespace character to extend beyond
 * the preferred break width to make use of all available horizontal space. Very long
 * unbreakable words may extend beyond the preferred break offset regardless of the setting of
 * {@code allowWhitespaceBeyondEnd}.
 *
 * <p>It was previously considered to allow an arbitrary number of whitespace characters to
 * trail off the end of each wrap line, rather than just one. In the end, it turned out to be
 * better to limit this to just one character, as this conveniently avoids the need to ever
 * position the visual text caret outside the word-wrapped editor viewport (except in cases of
 * very long unbreakable words).
 *
 * @param paragraph a long line of text to be broken, i.e. a paragraph, or the remainder of a
 *        paragraph if some of its initial lines of wrapped text have already been laid out
 * @param preferredMaximumBreakOffset the preferred maximum break offset
 * @param allowWhitespaceBeyondEnd if true, allow one whitespace character to extend beyond
 *        {@code preferredMaximumBreakOffset} even when this could be avoided by choosing a
 *        smaller break offset
 */
static int adjustBreakOffsetToWord(CharSequence paragraph,
        final int preferredMaximumBreakOffset, boolean allowWhitespaceBeyondEnd)
{
    if (preferredMaximumBreakOffset < 0) {
        throw new IllegalArgumentException();
    }
    if (preferredMaximumBreakOffset > paragraph.length()) {
        throw new IllegalArgumentException();
    }
    /* BreakIterator.getLineInstance already seems to have a cache; creating a new instance here
    is just the cost of BreakIterator.clone(). So don't bother trying to cache the BreakIterator
    here. */
    BreakIterator bi = BreakIterator.getLineInstance(Locale.US);
    /* Use CharSequenceCharacterIterator to avoid copying the entire paragraph string every
    time. */
    bi.setText(new CharSequenceCharacterIterator(paragraph));

    int ret;
    if (preferredMaximumBreakOffset == 0) {
        // Skip forward to next boundary.
        ret = 0;
    } else if (
        allowWhitespaceBeyondEnd && preferredMaximumBreakOffset < paragraph.length() &&
        Character.isWhitespace(paragraph.charAt(preferredMaximumBreakOffset)))
    {
        // Allow one whitespace character to extend beyond the preferred break offset.
        return preferredMaximumBreakOffset + 1;
    } else {
        // Skip backwards to previous boundary.
        ret = bi.isBoundary(preferredMaximumBreakOffset)
            ? preferredMaximumBreakOffset
            : bi.preceding(preferredMaximumBreakOffset);
        if (ret == BreakIterator.DONE) {
            return preferredMaximumBreakOffset;
        }
    }
    if (ret == 0) {
        // Skip forward to next boundary (for words longer than the preferred break offset).
        ret = preferredMaximumBreakOffset > 0 && bi.isBoundary(preferredMaximumBreakOffset)
            ? preferredMaximumBreakOffset
            : bi.following(preferredMaximumBreakOffset);
        if (ret == BreakIterator.DONE) {
            ret = preferredMaximumBreakOffset;
        }
        /* The line-based break iterator will include whitespace trailing a word as well. Strip
        this off so we can apply our own policy here. */
        int retBeforeTrim = ret;
        while (ret > preferredMaximumBreakOffset &&
            Character.isWhitespace(paragraph.charAt(ret - 1)))
        {
            ret--;
        }
        /* If allowWhitespaceBeyondEnd is true, allow at most one whitespace character to trail
        the word at the end. */
        if ((allowWhitespaceBeyondEnd || ret == 0) && retBeforeTrim > ret) {
            ret++;
        }
    }
    return ret;
}

Source File: CodeArea.java From RichTextFX with BSD 2-Clause "Simplified" License

4 votes

@Override // to select words containing underscores
public void selectWord()
{
    if ( getLength() == 0 ) return;

    CaretSelectionBind<?,?,?> csb = getCaretSelectionBind();
    int paragraph = csb.getParagraphIndex();
    int position = csb.getColumnPosition(); 
    
    String paragraphText = getText( paragraph );
    BreakIterator breakIterator = BreakIterator.getWordInstance( getLocale() );
    breakIterator.setText( paragraphText );

    breakIterator.preceding( position );
    int start = breakIterator.current();
    
    while ( start > 0 && paragraphText.charAt( start-1 ) == '_' )
    {
        if ( --start > 0 && ! breakIterator.isBoundary( start-1 ) )
        {
            breakIterator.preceding( start );
            start = breakIterator.current();
        }
    }
    
    breakIterator.following( position );
    int end = breakIterator.current();
    int len = paragraphText.length();
    
    while ( end < len && paragraphText.charAt( end ) == '_' )
    {
        if ( ++end < len && ! breakIterator.isBoundary( end+1 ) )
        {
            breakIterator.following( end );
            end = breakIterator.current();
        }
        // For some reason single digits aren't picked up so ....
        else if ( Character.isDigit( paragraphText.charAt( end ) ) )
        {
            end++;
        }
    }
    
    csb.selectRange( paragraph, start, paragraph, end );
}

Java Code Examples for java.text.BreakIterator#isBoundary()