java.text.CharacterIterator#getIndex

Source File: SearchIterator.java From j2objc with Apache License 2.0

6 votes

/**
 * Set the target text to be searched. Text iteration will then begin at 
 * the start of the text string. This method is useful if you want to 
 * reuse an iterator to search within a different body of text.
 * 
 * @param text new text iterator to look for match, 
 * @exception IllegalArgumentException thrown when text is null or has
 *               0 length
 * @see #getTarget
 */
public void setTarget(CharacterIterator text)
{
    if (text == null || text.getEndIndex() == text.getIndex()) {
        throw new IllegalArgumentException("Illegal null or empty text");
    }

    text.setIndex(text.getBeginIndex());
    search_.setTarget(text);
    search_.matchedIndex_ = DONE;
    search_.setMatchedLength(0);
    search_.reset_ = true;
    search_.isForwardSearching_ = true;
    if (search_.breakIter() != null) {
        // Create a clone of CharacterItearator, so it won't
        // affect the position currently held by search_.text()
        search_.breakIter().setText((CharacterIterator)text.clone());
    }
    if (search_.internalBreakIter_ != null) {
        search_.internalBreakIter_.setText((CharacterIterator)text.clone());
    }
}

Source File: RuleBasedBreakIterator.java From Bytecoder with Apache License 2.0

5 votes

/**
 * Set the iterator to analyze a new piece of text.  This function resets
 * the current iteration position to the beginning of the text.
 * @param newText An iterator over the text to analyze.
 */
@Override
public void setText(CharacterIterator newText) {
    // Test iterator to see if we need to wrap it in a SafeCharIterator.
    // The correct behavior for CharacterIterators is to allow the
    // position to be set to the endpoint of the iterator.  Many
    // CharacterIterators do not uphold this, so this is a workaround
    // to permit them to use this class.
    int end = newText.getEndIndex();
    boolean goodIterator;
    try {
        newText.setIndex(end);  // some buggy iterators throw an exception here
        goodIterator = newText.getIndex() == end;
    }
    catch(IllegalArgumentException e) {
        goodIterator = false;
    }

    if (goodIterator) {
        text = newText;
    }
    else {
        text = new SafeCharIterator(newText);
    }
    text.first();

    cachedLastKnownBreak = BreakIterator.DONE;
}

Source File: DictionaryBasedBreakIterator.java From jdk8u_jdk with GNU General Public License v2.0

5 votes

/**
 * Sets the current iteration position to the first boundary position after
 * the specified position.
 * @param offset The position to begin searching forward from
 * @return The position of the first boundary after "offset"
 */
@Override
public int following(int offset) {
    CharacterIterator text = getText();
    checkOffset(offset, text);

    // if we have no cached break positions, or if "offset" is outside the
    // range covered by the cache, then dump the cache and call our
    // inherited following() method.  This will call other methods in this
    // class that may refresh the cache.
    if (cachedBreakPositions == null || offset < cachedBreakPositions[0] ||
            offset >= cachedBreakPositions[cachedBreakPositions.length - 1]) {
        cachedBreakPositions = null;
        return super.following(offset);
    }

    // on the other hand, if "offset" is within the range covered by the
    // cache, then just search the cache for the first break position
    // after "offset"
    else {
        positionInCache = 0;
        while (positionInCache < cachedBreakPositions.length
               && offset >= cachedBreakPositions[positionInCache]) {
            ++positionInCache;
        }
        text.setIndex(cachedBreakPositions[positionInCache]);
        return text.getIndex();
    }
}

Source File: RuleBasedBreakIterator.java From jdk8u_jdk with GNU General Public License v2.0

5 votes

/**
 * Set the iterator to analyze a new piece of text.  This function resets
 * the current iteration position to the beginning of the text.
 * @param newText An iterator over the text to analyze.
 */
@Override
public void setText(CharacterIterator newText) {
    // Test iterator to see if we need to wrap it in a SafeCharIterator.
    // The correct behavior for CharacterIterators is to allow the
    // position to be set to the endpoint of the iterator.  Many
    // CharacterIterators do not uphold this, so this is a workaround
    // to permit them to use this class.
    int end = newText.getEndIndex();
    boolean goodIterator;
    try {
        newText.setIndex(end);  // some buggy iterators throw an exception here
        goodIterator = newText.getIndex() == end;
    }
    catch(IllegalArgumentException e) {
        goodIterator = false;
    }

    if (goodIterator) {
        text = newText;
    }
    else {
        text = new SafeCharIterator(newText);
    }
    text.first();

    cachedLastKnownBreak = BreakIterator.DONE;
}

Source File: DictionaryBasedBreakIterator.java From openjdk-8 with GNU General Public License v2.0

5 votes

/**
 * Sets the current iteration position to the first boundary position after
 * the specified position.
 * @param offset The position to begin searching forward from
 * @return The position of the first boundary after "offset"
 */
@Override
public int following(int offset) {
    CharacterIterator text = getText();
    checkOffset(offset, text);

    // if we have no cached break positions, or if "offset" is outside the
    // range covered by the cache, then dump the cache and call our
    // inherited following() method.  This will call other methods in this
    // class that may refresh the cache.
    if (cachedBreakPositions == null || offset < cachedBreakPositions[0] ||
            offset >= cachedBreakPositions[cachedBreakPositions.length - 1]) {
        cachedBreakPositions = null;
        return super.following(offset);
    }

    // on the other hand, if "offset" is within the range covered by the
    // cache, then just search the cache for the first break position
    // after "offset"
    else {
        positionInCache = 0;
        while (positionInCache < cachedBreakPositions.length
               && offset >= cachedBreakPositions[positionInCache]) {
            ++positionInCache;
        }
        text.setIndex(cachedBreakPositions[positionInCache]);
        return text.getIndex();
    }
}

Source File: DictionaryBasedBreakIterator.java From openjdk-jdk9 with GNU General Public License v2.0

5 votes

/**
 * Sets the current iteration position to the last boundary position
 * before the specified position.
 * @param offset The position to begin searching from
 * @return The position of the last boundary before "offset"
 */
@Override
public int preceding(int offset) {
    CharacterIterator text = getText();
    checkOffset(offset, text);

    // if we have no cached break positions, or "offset" is outside the
    // range covered by the cache, we can just call the inherited routine
    // (which will eventually call other routines in this class that may
    // refresh the cache)
    if (cachedBreakPositions == null || offset <= cachedBreakPositions[0] ||
            offset > cachedBreakPositions[cachedBreakPositions.length - 1]) {
        cachedBreakPositions = null;
        return super.preceding(offset);
    }

    // on the other hand, if "offset" is within the range covered by the cache,
    // then all we have to do is search the cache for the last break position
    // before "offset"
    else {
        positionInCache = 0;
        while (positionInCache < cachedBreakPositions.length
               && offset > cachedBreakPositions[positionInCache]) {
            ++positionInCache;
        }
        --positionInCache;
        text.setIndex(cachedBreakPositions[positionInCache]);
        return text.getIndex();
    }
}

Source File: DictionaryBasedBreakIterator.java From Bytecoder with Apache License 2.0

5 votes

/**
 * Sets the current iteration position to the first boundary position after
 * the specified position.
 * @param offset The position to begin searching forward from
 * @return The position of the first boundary after "offset"
 */
@Override
public int following(int offset) {
    CharacterIterator text = getText();
    checkOffset(offset, text);

    // if we have no cached break positions, or if "offset" is outside the
    // range covered by the cache, then dump the cache and call our
    // inherited following() method.  This will call other methods in this
    // class that may refresh the cache.
    if (cachedBreakPositions == null || offset < cachedBreakPositions[0] ||
            offset >= cachedBreakPositions[cachedBreakPositions.length - 1]) {
        cachedBreakPositions = null;
        return super.following(offset);
    }

    // on the other hand, if "offset" is within the range covered by the
    // cache, then just search the cache for the first break position
    // after "offset"
    else {
        positionInCache = 0;
        while (positionInCache < cachedBreakPositions.length
               && offset >= cachedBreakPositions[positionInCache]) {
            ++positionInCache;
        }
        text.setIndex(cachedBreakPositions[positionInCache]);
        return text.getIndex();
    }
}

Source File: RuleBasedBreakIterator.java From jdk8u60 with GNU General Public License v2.0

5 votes

/**
 * Sets the current iteration position to the end of the text.
 * (i.e., the CharacterIterator's ending offset).
 * @return The text's past-the-end offset.
 */
@Override
public int last() {
    CharacterIterator t = getText();

    // I'm not sure why, but t.last() returns the offset of the last character,
    // rather than the past-the-end offset
    t.setIndex(t.getEndIndex());
    return t.getIndex();
}

Source File: RuleBasedBreakIterator.java From jdk8u_jdk with GNU General Public License v2.0

5 votes

/**
 * Sets the current iteration position to the end of the text.
 * (i.e., the CharacterIterator's ending offset).
 * @return The text's past-the-end offset.
 */
@Override
public int last() {
    CharacterIterator t = getText();

    // I'm not sure why, but t.last() returns the offset of the last character,
    // rather than the past-the-end offset
    t.setIndex(t.getEndIndex());
    return t.getIndex();
}

Source File: RuleBasedBreakIterator.java From jdk8u60 with GNU General Public License v2.0

5 votes

/**
 * Sets the current iteration position to the beginning of the text.
 * (i.e., the CharacterIterator's starting offset).
 * @return The offset of the beginning of the text.
 */
@Override
public int first() {
    CharacterIterator t = getText();

    t.first();
    return t.getIndex();
}

Source File: RuleBasedBreakIterator.java From dragonwell8_jdk with GNU General Public License v2.0

5 votes

/**
 * Sets the current iteration position to the beginning of the text.
 * (i.e., the CharacterIterator's starting offset).
 * @return The offset of the beginning of the text.
 */
@Override
public int first() {
    CharacterIterator t = getText();

    t.first();
    return t.getIndex();
}

Source File: DictionaryBasedBreakIterator.java From jdk8u-jdk with GNU General Public License v2.0

5 votes

/**
 * Sets the current iteration position to the last boundary position
 * before the specified position.
 * @param offset The position to begin searching from
 * @return The position of the last boundary before "offset"
 */
@Override
public int preceding(int offset) {
    CharacterIterator text = getText();
    checkOffset(offset, text);

    // if we have no cached break positions, or "offset" is outside the
    // range covered by the cache, we can just call the inherited routine
    // (which will eventually call other routines in this class that may
    // refresh the cache)
    if (cachedBreakPositions == null || offset <= cachedBreakPositions[0] ||
            offset > cachedBreakPositions[cachedBreakPositions.length - 1]) {
        cachedBreakPositions = null;
        return super.preceding(offset);
    }

    // on the other hand, if "offset" is within the range covered by the cache,
    // then all we have to do is search the cache for the last break position
    // before "offset"
    else {
        positionInCache = 0;
        while (positionInCache < cachedBreakPositions.length
               && offset > cachedBreakPositions[positionInCache]) {
            ++positionInCache;
        }
        --positionInCache;
        text.setIndex(cachedBreakPositions[positionInCache]);
        return text.getIndex();
    }
}

Source File: RuleBasedBreakIterator.java From jdk-1.7-annotated with Apache License 2.0

5 votes

/**
 * Sets the current iteration position to the beginning of the text.
 * (i.e., the CharacterIterator's starting offset).
 * @return The offset of the beginning of the text.
 */
public int first() {
    CharacterIterator t = getText();

    t.first();
    return t.getIndex();
}

Source File: StandardGlyphVector.java From dragonwell8_jdk with GNU General Public License v2.0

5 votes

public StandardGlyphVector(Font font, CharacterIterator iter, FontRenderContext frc) {
    int offset = iter.getBeginIndex();
    char[] text = new char [iter.getEndIndex() - offset];
    for(char c = iter.first();
        c != CharacterIterator.DONE;
        c = iter.next()) {
        text[iter.getIndex() - offset] = c;
    }
    init(font, text, 0, text.length, frc, UNINITIALIZED_FLAGS);
}

Source File: RuleBasedBreakIterator.java From Bytecoder with Apache License 2.0

4 votes

SafeCharIterator(CharacterIterator base) {
    this.base = base;
    this.rangeStart = base.getBeginIndex();
    this.rangeLimit = base.getEndIndex();
    this.currentIndex = base.getIndex();
}

Source File: RuleBasedBreakIterator.java From openjdk-8-source with GNU General Public License v2.0

4 votes

/**
 * This method backs the iterator back up to a "safe position" in the text.
 * This is a position that we know, without any context, must be a break position.
 * The various calling methods then iterate forward from this safe position to
 * the appropriate position to return.  (For more information, see the description
 * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
 */
protected int handlePrevious() {
    CharacterIterator text = getText();
    int state = START_STATE;
    int category = 0;
    int lastCategory = 0;
    int c = getCurrent();

    // loop until we reach the beginning of the text or transition to state 0
    while (c != CharacterIterator.DONE && state != STOP_STATE) {

        // save the last character's category and look up the current
        // character's category
        lastCategory = category;
        category = lookupCategory(c);

        // if the current character isn't an ignore character, look up a
        // state transition in the backwards state table
        if (category != IGNORE) {
            state = lookupBackwardState(state, category);
        }

        // then advance one character backwards
        c = getPrevious();
    }

    // if we didn't march off the beginning of the text, we're either one or two
    // positions away from the real break position.  (One because of the call to
    // previous() at the end of the loop above, and another because the character
    // that takes us into the stop state will always be the character BEFORE
    // the break position.)
    if (c != CharacterIterator.DONE) {
        if (lastCategory != IGNORE) {
            getNext();
            getNext();
        }
        else {
            getNext();
        }
    }
    return text.getIndex();
}

Source File: RuleBasedBreakIterator.java From jdk8u-jdk with GNU General Public License v2.0

4 votes

/**
 * This method is the actual implementation of the next() method.  All iteration
 * vectors through here.  This method initializes the state machine to state 1
 * and advances through the text character by character until we reach the end
 * of the text or the state machine transitions to state 0.  We update our return
 * value every time the state machine passes through a possible end state.
 */
protected int handleNext() {
    // if we're already at the end of the text, return DONE.
    CharacterIterator text = getText();
    if (text.getIndex() == text.getEndIndex()) {
        return BreakIterator.DONE;
    }

    // no matter what, we always advance at least one character forward
    int result = getNextIndex();
    int lookaheadResult = 0;

    // begin in state 1
    int state = START_STATE;
    int category;
    int c = getCurrent();

    // loop until we reach the end of the text or transition to state 0
    while (c != CharacterIterator.DONE && state != STOP_STATE) {

        // look up the current character's character category (which tells us
        // which column in the state table to look at)
        category = lookupCategory(c);

        // if the character isn't an ignore character, look up a state
        // transition in the state table
        if (category != IGNORE) {
            state = lookupState(state, category);
        }

        // if the state we've just transitioned to is a lookahead state,
        // (but not also an end state), save its position.  If it's
        // both a lookahead state and an end state, update the break position
        // to the last saved lookup-state position
        if (lookaheadStates[state]) {
            if (endStates[state]) {
                result = lookaheadResult;
            }
            else {
                lookaheadResult = getNextIndex();
            }
        }

        // otherwise, if the state we've just transitioned to is an accepting
        // state, update the break position to be the current iteration position
        else {
            if (endStates[state]) {
                result = getNextIndex();
            }
        }

        c = getNext();
    }

    // if we've run off the end of the text, and the very last character took us into
    // a lookahead state, advance the break position to the lookahead position
    // (the theory here is that if there are no characters at all after the lookahead
    // position, that always matches the lookahead criteria)
    if (c == CharacterIterator.DONE && lookaheadResult == text.getEndIndex()) {
        result = lookaheadResult;
    }

    text.setIndex(result);
    return result;
}

Source File: DictionaryBasedBreakIterator.java From Bytecoder with Apache License 2.0

4 votes

/**
 * This is the implementation function for next().
 */
@Override
protected int handleNext() {
    CharacterIterator text = getText();

    // if there are no cached break positions, or if we've just moved
    // off the end of the range covered by the cache, we have to dump
    // and possibly regenerate the cache
    if (cachedBreakPositions == null ||
        positionInCache == cachedBreakPositions.length - 1) {

        // start by using the inherited handleNext() to find a tentative return
        // value.   dictionaryCharCount tells us how many dictionary characters
        // we passed over on our way to the tentative return value
        int startPos = text.getIndex();
        dictionaryCharCount = 0;
        int result = super.handleNext();

        // if we passed over more than one dictionary character, then we use
        // divideUpDictionaryRange() to regenerate the cached break positions
        // for the new range
        if (dictionaryCharCount > 1 && result - startPos > 1) {
            divideUpDictionaryRange(startPos, result);
        }

        // otherwise, the value we got back from the inherited fuction
        // is our return value, and we can dump the cache
        else {
            cachedBreakPositions = null;
            return result;
        }
    }

    // if the cache of break positions has been regenerated (or existed all
    // along), then just advance to the next break position in the cache
    // and return it
    if (cachedBreakPositions != null) {
        ++positionInCache;
        text.setIndex(cachedBreakPositions[positionInCache]);
        return cachedBreakPositions[positionInCache];
    }
    return -9999;   // SHOULD NEVER GET HERE!
}

Source File: RuleBasedBreakIterator.java From TencentKona-8 with GNU General Public License v2.0

4 votes

/**
 * This method backs the iterator back up to a "safe position" in the text.
 * This is a position that we know, without any context, must be a break position.
 * The various calling methods then iterate forward from this safe position to
 * the appropriate position to return.  (For more information, see the description
 * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
 */
protected int handlePrevious() {
    CharacterIterator text = getText();
    int state = START_STATE;
    int category = 0;
    int lastCategory = 0;
    int c = getCurrent();

    // loop until we reach the beginning of the text or transition to state 0
    while (c != CharacterIterator.DONE && state != STOP_STATE) {

        // save the last character's category and look up the current
        // character's category
        lastCategory = category;
        category = lookupCategory(c);

        // if the current character isn't an ignore character, look up a
        // state transition in the backwards state table
        if (category != IGNORE) {
            state = lookupBackwardState(state, category);
        }

        // then advance one character backwards
        c = getPrevious();
    }

    // if we didn't march off the beginning of the text, we're either one or two
    // positions away from the real break position.  (One because of the call to
    // previous() at the end of the loop above, and another because the character
    // that takes us into the stop state will always be the character BEFORE
    // the break position.)
    if (c != CharacterIterator.DONE) {
        if (lastCategory != IGNORE) {
            getNext();
            getNext();
        }
        else {
            getNext();
        }
    }
    return text.getIndex();
}

Source File: RuleBasedBreakIterator.java From jdk8u-jdk with GNU General Public License v2.0

4 votes

/**
 * This method is the actual implementation of the next() method.  All iteration
 * vectors through here.  This method initializes the state machine to state 1
 * and advances through the text character by character until we reach the end
 * of the text or the state machine transitions to state 0.  We update our return
 * value every time the state machine passes through a possible end state.
 */
protected int handleNext() {
    // if we're already at the end of the text, return DONE.
    CharacterIterator text = getText();
    if (text.getIndex() == text.getEndIndex()) {
        return BreakIterator.DONE;
    }

    // no matter what, we always advance at least one character forward
    int result = getNextIndex();
    int lookaheadResult = 0;

    // begin in state 1
    int state = START_STATE;
    int category;
    int c = getCurrent();

    // loop until we reach the end of the text or transition to state 0
    while (c != CharacterIterator.DONE && state != STOP_STATE) {

        // look up the current character's character category (which tells us
        // which column in the state table to look at)
        category = lookupCategory(c);

        // if the character isn't an ignore character, look up a state
        // transition in the state table
        if (category != IGNORE) {
            state = lookupState(state, category);
        }

        // if the state we've just transitioned to is a lookahead state,
        // (but not also an end state), save its position.  If it's
        // both a lookahead state and an end state, update the break position
        // to the last saved lookup-state position
        if (lookaheadStates[state]) {
            if (endStates[state]) {
                result = lookaheadResult;
            }
            else {
                lookaheadResult = getNextIndex();
            }
        }

        // otherwise, if the state we've just transitioned to is an accepting
        // state, update the break position to be the current iteration position
        else {
            if (endStates[state]) {
                result = getNextIndex();
            }
        }

        c = getNext();
    }

    // if we've run off the end of the text, and the very last character took us into
    // a lookahead state, advance the break position to the lookahead position
    // (the theory here is that if there are no characters at all after the lookahead
    // position, that always matches the lookahead criteria)
    if (c == CharacterIterator.DONE && lookaheadResult == text.getEndIndex()) {
        result = lookaheadResult;
    }

    text.setIndex(result);
    return result;
}

Java Code Examples for java.text.CharacterIterator#getIndex()