com.ibm.icu.text.BreakIterator#DONE

Source File: SimpleFilteredSentenceBreakIterator.java From fitnotifications with Apache License 2.0

6 votes

/**
 * Given that the delegate has already given its "initial" answer,
 * find the NEXT actual (non-suppressed) break.
 * @param n initial position from delegate
 * @return new break position or BreakIterator.DONE
 */
private final int internalNext(int n) {
    if (n == BreakIterator.DONE || // at end or
            backwardsTrie == null) { // .. no backwards table loaded == no exceptions
        return n;
    }
    resetState();

    final int textLen = text.getLength();

    while (n != BreakIterator.DONE && n != textLen) {
        // outer loop runs once per underlying break (from fDelegate).
        // loops while 'n' points to an exception.

        if (breakExceptionAt(n)) {
            // n points to a break exception
            n = delegate.next();
        } else {
            // no exception at this spot
            return n;
        }
    }
    return n; //hit underlying DONE or break at end of text
}

Source File: SimpleFilteredSentenceBreakIterator.java From fitnotifications with Apache License 2.0

6 votes

/**
 * Given that the delegate has already given its "initial" answer,
 * find the PREV actual (non-suppressed) break.
 * @param n initial position from delegate
 * @return new break position or BreakIterator.DONE
 */
private final int internalPrev(int n) {
    if (n == 0 || n == BreakIterator.DONE || // at end or
            backwardsTrie == null) { // .. no backwards table loaded == no exceptions
        return n;
    }
    resetState();

    while (n != BreakIterator.DONE && n != 0) {
        // outer loop runs once per underlying break (from fDelegate).
        // loops while 'n' points to an exception.

        if (breakExceptionAt(n)) {
            // n points to a break exception
            n = delegate.previous();
        } else {
            // no exception at this spot
            return n;
        }
    }
    return n; //hit underlying DONE or break at end of text
}

Source File: BreakIteratorWrapper.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0

6 votes

private int calcStatus(int current, int next) {
    if (current == BreakIterator.DONE || next == BreakIterator.DONE) {
        return RuleBasedBreakIterator.WORD_NONE;
    }
    int begin = start + current;
    int end = start + next;
    int codepoint;
    for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
        codepoint = UTF16.charAt(text, 0, end, begin);
        if (UCharacter.isDigit(codepoint)) {
            return RuleBasedBreakIterator.WORD_NUMBER;
        } else if (UCharacter.isLetter(codepoint)) {
            return RuleBasedBreakIterator.WORD_LETTER;
        }
    }
    return RuleBasedBreakIterator.WORD_NONE;
}

Source File: IcuTokenizer.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0

6 votes

private boolean incrementTokenBuffer() {
    int start = breaker.current();
    if (start == BreakIterator.DONE) {
        throw new IllegalStateException();
    }
    // find the next set of boundaries, skipping over non-tokens (rule status 0)
    int end = breaker.next();
    while (end != BreakIterator.DONE && breaker.getRuleStatus() == 0) {
        start = end;
        end = breaker.next();
    }
    if (end == BreakIterator.DONE) {
        return false;
    }
    termAtt.copyBuffer(buffer, start, end - start);
    offsetAtt.setOffset(correctOffset(offset + start), correctOffset(offset + end));
    typeAtt.setType(config.getType(breaker.getScriptCode(), breaker.getRuleStatus()));
    scriptAtt.setCode(breaker.getScriptCode());
    return true;
}

Source File: SpellCheckIterator.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0

6 votes

/**
 * Skip the tokens until the stop character is reached.
 *
 * @param begin the begin index
 * @param stop the stop character
 */
protected final void skipTokens(final int begin, final int stop) {
	final boolean isStoppingOnWhiteSpace= stop == WHITE_SPACE_TOKEN;
	int end= begin;
	while (end < fContent.length()) {
		char ch= fContent.charAt(end);
		if (ch == stop || isStoppingOnWhiteSpace && Character.isWhitespace(ch))
			break;
		end++;
	}

	if (end < fContent.length()) {

		fNext= end;
		fPredecessor= fNext;

		fSuccessor= fWordIterator.following(fNext);
	} else
		fSuccessor= BreakIterator.DONE;
}

Source File: ICUWordRecognizer.java From birt with Eclipse Public License 1.0

5 votes

public Word getNextWord( )
{
	int start = wordBreaker.current( );
	end = wordBreaker.next( );
	if(end!=BreakIterator.DONE)
	{
		return new Word(text, start, end ); 
	}
	else
	{
		return null;
	}
}

Source File: WordRecognizerWrapper.java From birt with Eclipse Public License 1.0

5 votes

public Word getNextWord( )
{
	start = end;
	end = breakIterator.next( );
	if ( end != BreakIterator.DONE )
	{
		return new Word( text, start, end );	
	}
	return null;
}

Source File: SpellCheckIterator.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0

5 votes

/**
 * Creates a new spell check iterator.
 *
 * @param document the document containing the specified partition
 * @param region the region to spell check
 * @param locale the locale to use for spell checking
 * @param breakIterator the break-iterator
 */
public SpellCheckIterator(IDocument document, IRegion region, Locale locale, BreakIterator breakIterator) {
	fOffset= region.getOffset();
	fWordIterator= breakIterator;
	fDelimiter= TextUtilities.getDefaultLineDelimiter(document);

	String content;
	try {

		content= document.get(region.getOffset(), region.getLength());
		if (content.startsWith(NLSElement.TAG_PREFIX))
			content= ""; //$NON-NLS-1$

	} catch (Exception exception) {
		content= ""; //$NON-NLS-1$
	}
	fContent= content;

	fWordIterator.setText(content);
	fPredecessor= fWordIterator.first();
	fSuccessor= fWordIterator.next();

	final BreakIterator iterator= BreakIterator.getSentenceInstance(locale);
	iterator.setText(content);

	int offset= iterator.current();
	while (offset != BreakIterator.DONE) {

		fSentenceBreaks.add(new Integer(offset));
		offset= iterator.next();
	}
}

Source File: JavaEditor.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0

5 votes

/**
 * Finds the previous position before the given position.
 *
 * @param position the current position
 * @return the previous position
 */
protected int findPreviousPosition(int position) {
	ISourceViewer viewer= getSourceViewer();
	int widget= -1;
	int previous= position;
	while (previous != BreakIterator.DONE && widget == -1) { // XXX: optimize
		previous= fIterator.preceding(previous);
		if (previous != BreakIterator.DONE)
			widget= modelOffset2WidgetOffset(viewer, previous);
	}

	IDocument document= viewer.getDocument();
	LinkedModeModel model= LinkedModeModel.getModel(document, position);
	if (model != null && previous != BreakIterator.DONE) {
		LinkedPosition linkedPosition= model.findPosition(new LinkedPosition(document, position, 0));
		if (linkedPosition != null) {
			int linkedPositionOffset= linkedPosition.getOffset();
			if (position != linkedPositionOffset && previous < linkedPositionOffset)
				previous= linkedPositionOffset;
		} else {
			LinkedPosition previousLinkedPosition= model.findPosition(new LinkedPosition(document, previous, 0));
			if (previousLinkedPosition != null) {
				int previousLinkedPositionEnd= previousLinkedPosition.getOffset() + previousLinkedPosition.getLength();
				if (position != previousLinkedPositionEnd && previous < previousLinkedPositionEnd)
					previous= previousLinkedPositionEnd;
			}
		}
	}

	return previous;
}

Source File: JavaEditor.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0

5 votes

@Override
public void run() {
	// Check whether we are in a java code partition and the preference is enabled
	final IPreferenceStore store= getPreferenceStore();
	if (!store.getBoolean(PreferenceConstants.EDITOR_SUB_WORD_NAVIGATION)) {
		super.run();
		return;
	}

	final ISourceViewer viewer= getSourceViewer();
	final IDocument document= viewer.getDocument();
	try {
		fIterator.setText((CharacterIterator)new DocumentCharacterIterator(document));
		int position= widgetOffset2ModelOffset(viewer, viewer.getTextWidget().getCaretOffset());
		if (position == -1)
			return;

		int previous= findPreviousPosition(position);
		if (isBlockSelectionModeEnabled() && document.getLineOfOffset(previous) != document.getLineOfOffset(position)) {
			super.run(); // may navigate into virtual white space
		} else if (previous != BreakIterator.DONE) {
			setCaretPosition(previous);
			getTextWidget().showSelection();
			fireSelectionChanged();
		}
	} catch (BadLocationException x) {
		// ignore - getLineOfOffset failed
	}

}

Source File: JavaEditor.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0

5 votes

/**
 * Finds the next position after the given position.
 *
 * @param position the current position
 * @return the next position
 */
protected int findNextPosition(int position) {
	ISourceViewer viewer= getSourceViewer();
	int widget= -1;
	int next= position;
	while (next != BreakIterator.DONE && widget == -1) { // XXX: optimize
		next= fIterator.following(next);
		if (next != BreakIterator.DONE)
			widget= modelOffset2WidgetOffset(viewer, next);
	}

	IDocument document= viewer.getDocument();
	LinkedModeModel model= LinkedModeModel.getModel(document, position);
	if (model != null && next != BreakIterator.DONE) {
		LinkedPosition linkedPosition= model.findPosition(new LinkedPosition(document, position, 0));
		if (linkedPosition != null) {
			int linkedPositionEnd= linkedPosition.getOffset() + linkedPosition.getLength();
			if (position != linkedPositionEnd && linkedPositionEnd < next)
				next= linkedPositionEnd;
		} else {
			LinkedPosition nextLinkedPosition= model.findPosition(new LinkedPosition(document, next, 0));
			if (nextLinkedPosition != null) {
				int nextLinkedPositionOffset= nextLinkedPosition.getOffset();
				if (position != nextLinkedPositionOffset && nextLinkedPositionOffset < next)
					next= nextLinkedPositionOffset;
			}
		}
	}

	return next;
}

Source File: JavaEditor.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0

5 votes

@Override
public void run() {
	// Check whether we are in a java code partition and the preference is enabled
	final IPreferenceStore store= getPreferenceStore();
	if (!store.getBoolean(PreferenceConstants.EDITOR_SUB_WORD_NAVIGATION)) {
		super.run();
		return;
	}

	final ISourceViewer viewer= getSourceViewer();
	final IDocument document= viewer.getDocument();
	try {
		fIterator.setText((CharacterIterator)new DocumentCharacterIterator(document));
		int position= widgetOffset2ModelOffset(viewer, viewer.getTextWidget().getCaretOffset());
		if (position == -1)
			return;

		int next= findNextPosition(position);
		if (isBlockSelectionModeEnabled() && document.getLineOfOffset(next) != document.getLineOfOffset(position)) {
			super.run(); // may navigate into virtual white space
		} else if (next != BreakIterator.DONE) {
			setCaretPosition(next);
			getTextWidget().showSelection();
			fireSelectionChanged();
		}
	} catch (BadLocationException x) {
		// ignore
	}
}

Source File: RenamingNameSuggestor.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0

5 votes

/**
 * Grab a list of camelCase-separated suffixes from the typeName, for
 * example:
 *
 * "JavaElementName" => { "Java", "Element", "Name }
 *
 * "ASTNode" => { "AST", "Node" }
 *
 */
private String[] getSuffixes(String typeName) {
	List<String> suffixes= new ArrayList<String>();
	JavaWordIterator iterator= new JavaWordIterator();
	iterator.setText(typeName);
	int lastmatch= 0;
	int match;
	while ( (match= iterator.next()) != BreakIterator.DONE) {
		suffixes.add(typeName.substring(lastmatch, match));
		lastmatch= match;
	}
	return suffixes.toArray(new String[0]);
}

Source File: BreakIteratorWrapper.java From lucene-solr with Apache License 2.0

5 votes

/** Returns current rule status for the text between breaks. (determines token type) */
private int calcStatus(int current, int next) {
  // to support presentation selectors, we need to handle alphanum, num, and none at least, so currently not worth optimizing.
  // https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3AEmoji%3A%5D-%5B%3AEmoji_Presentation%3A%5D&g=Word_Break&i=
  if (next != BreakIterator.DONE && isEmoji(current, next)) {
    return ICUTokenizerConfig.EMOJI_SEQUENCE_STATUS;
  } else {
    return rbbi.getRuleStatus();
  }
}

Source File: SpellCheckIterator.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0

4 votes

public final boolean hasNext() {
	return fSuccessor != BreakIterator.DONE;
}

Source File: SpellCheckIterator.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0

4 votes

/**
 * Determines the next token to be spell checked.
 *
 * @return the next token to be spell checked, or <code>null</code>
 *         iff the next token is not a candidate for spell checking.
 */
protected String nextToken() {

	String token= null;

	fPrevious= fPredecessor;
	fStartsSentence= false;

	nextBreak();

	boolean update= false;
	if (fNext - fPrevious > 0) {

		if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == IJavaDocTagConstants.JAVADOC_TAG_PREFIX) {

			nextBreak();
			if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
				update= true;
				token= fContent.substring(fPrevious, fNext);
			} else
				fPredecessor= fNext;

		} else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == IHtmlTagConstants.HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) || fContent.charAt(fNext) == '/')) {

			if (fContent.startsWith(IHtmlTagConstants.HTML_CLOSE_PREFIX, fPrevious))
				nextBreak();

			nextBreak();

			if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == IHtmlTagConstants.HTML_TAG_POSTFIX) {

				nextBreak();
				if (fSuccessor != BreakIterator.DONE) {
					update= true;
					token= fContent.substring(fPrevious, fNext);
				}
			}
		} else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == IHtmlTagConstants.HTML_ENTITY_START && (Character.isLetter(fContent.charAt(fNext)))) {
			nextBreak();
			if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == IHtmlTagConstants.HTML_ENTITY_END) {
				nextBreak();
				if (isToken(fContent.substring(fPrevious, fNext), IHtmlTagConstants.HTML_ENTITY_CODES)) {
					skipTokens(fPrevious, IHtmlTagConstants.HTML_ENTITY_END);
					update= true;
				} else
					token= fContent.substring(fPrevious, fNext);
			} else
				token= fContent.substring(fPrevious, fNext);

			update= true;
		} else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) {

			if (isUrlToken(fPrevious))
				skipTokens(fPrevious, WHITE_SPACE_TOKEN);
			else if (isToken(IJavaDocTagConstants.JAVADOC_PARAM_TAGS))
				fLastToken= null;
			else if (isToken(IJavaDocTagConstants.JAVADOC_REFERENCE_TAGS)) {
				fLastToken= null;
				skipTokens(fPrevious, fDelimiter.charAt(0));
			} else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious) && !fIsIgnoringSingleLetters)
				token= fContent.substring(fPrevious, fNext);

			update= true;
		}
	}

	if (update && fSentenceBreaks.size() > 0) {

		if (fPrevious >= nextSentence()) {

			while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence())
				fSentenceBreaks.removeFirst();

			fStartsSentence= (fLastToken == null) || (token != null);
		}
	}
	return token;
}

Source File: WordRecognizerWrapper.java From birt with Eclipse Public License 1.0

4 votes

public boolean hasWord( )
{
	return end != BreakIterator.DONE && end < text.length( );
}

Source File: SimpleFilteredSentenceBreakIterator.java From fitnotifications with Apache License 2.0

4 votes

/**
 * Is there an exception at this point?
 *
 * @param n
 * @return
 */
private final boolean breakExceptionAt(int n) {
    // Note: the C++ version of this function is SimpleFilteredSentenceBreakIterator::breakExceptionAt()

    int bestPosn = -1;
    int bestValue = -1;

    // loops while 'n' points to an exception
    text.setIndex(n);
    backwardsTrie.reset();
    int uch;

    // Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
    if ((uch = text.previousCodePoint()) == ' ') { // TODO: skip a class of chars here??
        // TODO only do this the 1st time?
    } else {
        uch = text.nextCodePoint();
    }

    BytesTrie.Result r = BytesTrie.Result.INTERMEDIATE_VALUE;

    while ((uch = text.previousCodePoint()) != UCharacterIterator.DONE && // more to consume backwards and..
            ((r = backwardsTrie.nextForCodePoint(uch)).hasNext())) {// more in the trie
        if (r.hasValue()) { // remember the best match so far
            bestPosn = text.getIndex();
            bestValue = backwardsTrie.getValue();
        }
    }

    if (r.matches()) { // exact match?
        bestValue = backwardsTrie.getValue();
        bestPosn = text.getIndex();
    }

    if (bestPosn >= 0) {
        if (bestValue == Builder.MATCH) { // exact match!
            return true; // Exception here.
        } else if (bestValue == Builder.PARTIAL && forwardsPartialTrie != null) {
            // make sure there's a forward trie
            // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
            // to see if it matches something going forward.
            forwardsPartialTrie.reset();

            BytesTrie.Result rfwd = BytesTrie.Result.INTERMEDIATE_VALUE;
            text.setIndex(bestPosn); // hope that's close ..
            while ((uch = text.nextCodePoint()) != BreakIterator.DONE
                    && ((rfwd = forwardsPartialTrie.nextForCodePoint(uch)).hasNext())) {
            }
            if (rfwd.matches()) {
                // Exception here
                return true;
            } // else fall through
        } // else fall through
    } // else fall through
    return false; // No exception here.
}

Source File: SpellCheckIterator.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0

3 votes

public String next() {

		String token= nextToken();
		while (token == null && fSuccessor != BreakIterator.DONE)
			token= nextToken();

		fLastToken= token;

		return token;
	}

Java Code Examples for com.ibm.icu.text.BreakIterator#DONE