Java Code Examples for com.ibm.icu.text.BreakIterator#DONE
The following examples show how to use
com.ibm.icu.text.BreakIterator#DONE .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SimpleFilteredSentenceBreakIterator.java From fitnotifications with Apache License 2.0 | 6 votes |
/** * Given that the delegate has already given its "initial" answer, * find the NEXT actual (non-suppressed) break. * @param n initial position from delegate * @return new break position or BreakIterator.DONE */ private final int internalNext(int n) { if (n == BreakIterator.DONE || // at end or backwardsTrie == null) { // .. no backwards table loaded == no exceptions return n; } resetState(); final int textLen = text.getLength(); while (n != BreakIterator.DONE && n != textLen) { // outer loop runs once per underlying break (from fDelegate). // loops while 'n' points to an exception. if (breakExceptionAt(n)) { // n points to a break exception n = delegate.next(); } else { // no exception at this spot return n; } } return n; //hit underlying DONE or break at end of text }
Example 2
Source File: SimpleFilteredSentenceBreakIterator.java From fitnotifications with Apache License 2.0 | 6 votes |
/** * Given that the delegate has already given its "initial" answer, * find the PREV actual (non-suppressed) break. * @param n initial position from delegate * @return new break position or BreakIterator.DONE */ private final int internalPrev(int n) { if (n == 0 || n == BreakIterator.DONE || // at end or backwardsTrie == null) { // .. no backwards table loaded == no exceptions return n; } resetState(); while (n != BreakIterator.DONE && n != 0) { // outer loop runs once per underlying break (from fDelegate). // loops while 'n' points to an exception. if (breakExceptionAt(n)) { // n points to a break exception n = delegate.previous(); } else { // no exception at this spot return n; } } return n; //hit underlying DONE or break at end of text }
Example 3
Source File: BreakIteratorWrapper.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
private int calcStatus(int current, int next) { if (current == BreakIterator.DONE || next == BreakIterator.DONE) { return RuleBasedBreakIterator.WORD_NONE; } int begin = start + current; int end = start + next; int codepoint; for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) { codepoint = UTF16.charAt(text, 0, end, begin); if (UCharacter.isDigit(codepoint)) { return RuleBasedBreakIterator.WORD_NUMBER; } else if (UCharacter.isLetter(codepoint)) { return RuleBasedBreakIterator.WORD_LETTER; } } return RuleBasedBreakIterator.WORD_NONE; }
Example 4
Source File: IcuTokenizer.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
private boolean incrementTokenBuffer() { int start = breaker.current(); if (start == BreakIterator.DONE) { throw new IllegalStateException(); } // find the next set of boundaries, skipping over non-tokens (rule status 0) int end = breaker.next(); while (end != BreakIterator.DONE && breaker.getRuleStatus() == 0) { start = end; end = breaker.next(); } if (end == BreakIterator.DONE) { return false; } termAtt.copyBuffer(buffer, start, end - start); offsetAtt.setOffset(correctOffset(offset + start), correctOffset(offset + end)); typeAtt.setType(config.getType(breaker.getScriptCode(), breaker.getRuleStatus())); scriptAtt.setCode(breaker.getScriptCode()); return true; }
Example 5
Source File: SpellCheckIterator.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 | 6 votes |
/** * Skip the tokens until the stop character is reached. * * @param begin the begin index * @param stop the stop character */ protected final void skipTokens(final int begin, final int stop) { final boolean isStoppingOnWhiteSpace= stop == WHITE_SPACE_TOKEN; int end= begin; while (end < fContent.length()) { char ch= fContent.charAt(end); if (ch == stop || isStoppingOnWhiteSpace && Character.isWhitespace(ch)) break; end++; } if (end < fContent.length()) { fNext= end; fPredecessor= fNext; fSuccessor= fWordIterator.following(fNext); } else fSuccessor= BreakIterator.DONE; }
Example 6
Source File: ICUWordRecognizer.java From birt with Eclipse Public License 1.0 | 5 votes |
public Word getNextWord( ) { int start = wordBreaker.current( ); end = wordBreaker.next( ); if(end!=BreakIterator.DONE) { return new Word(text, start, end ); } else { return null; } }
Example 7
Source File: WordRecognizerWrapper.java From birt with Eclipse Public License 1.0 | 5 votes |
public Word getNextWord( ) { start = end; end = breakIterator.next( ); if ( end != BreakIterator.DONE ) { return new Word( text, start, end ); } return null; }
Example 8
Source File: SpellCheckIterator.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 | 5 votes |
/** * Creates a new spell check iterator. * * @param document the document containing the specified partition * @param region the region to spell check * @param locale the locale to use for spell checking * @param breakIterator the break-iterator */ public SpellCheckIterator(IDocument document, IRegion region, Locale locale, BreakIterator breakIterator) { fOffset= region.getOffset(); fWordIterator= breakIterator; fDelimiter= TextUtilities.getDefaultLineDelimiter(document); String content; try { content= document.get(region.getOffset(), region.getLength()); if (content.startsWith(NLSElement.TAG_PREFIX)) content= ""; //$NON-NLS-1$ } catch (Exception exception) { content= ""; //$NON-NLS-1$ } fContent= content; fWordIterator.setText(content); fPredecessor= fWordIterator.first(); fSuccessor= fWordIterator.next(); final BreakIterator iterator= BreakIterator.getSentenceInstance(locale); iterator.setText(content); int offset= iterator.current(); while (offset != BreakIterator.DONE) { fSentenceBreaks.add(new Integer(offset)); offset= iterator.next(); } }
Example 9
Source File: JavaEditor.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 | 5 votes |
/** * Finds the previous position before the given position. * * @param position the current position * @return the previous position */ protected int findPreviousPosition(int position) { ISourceViewer viewer= getSourceViewer(); int widget= -1; int previous= position; while (previous != BreakIterator.DONE && widget == -1) { // XXX: optimize previous= fIterator.preceding(previous); if (previous != BreakIterator.DONE) widget= modelOffset2WidgetOffset(viewer, previous); } IDocument document= viewer.getDocument(); LinkedModeModel model= LinkedModeModel.getModel(document, position); if (model != null && previous != BreakIterator.DONE) { LinkedPosition linkedPosition= model.findPosition(new LinkedPosition(document, position, 0)); if (linkedPosition != null) { int linkedPositionOffset= linkedPosition.getOffset(); if (position != linkedPositionOffset && previous < linkedPositionOffset) previous= linkedPositionOffset; } else { LinkedPosition previousLinkedPosition= model.findPosition(new LinkedPosition(document, previous, 0)); if (previousLinkedPosition != null) { int previousLinkedPositionEnd= previousLinkedPosition.getOffset() + previousLinkedPosition.getLength(); if (position != previousLinkedPositionEnd && previous < previousLinkedPositionEnd) previous= previousLinkedPositionEnd; } } } return previous; }
Example 10
Source File: JavaEditor.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 | 5 votes |
@Override public void run() { // Check whether we are in a java code partition and the preference is enabled final IPreferenceStore store= getPreferenceStore(); if (!store.getBoolean(PreferenceConstants.EDITOR_SUB_WORD_NAVIGATION)) { super.run(); return; } final ISourceViewer viewer= getSourceViewer(); final IDocument document= viewer.getDocument(); try { fIterator.setText((CharacterIterator)new DocumentCharacterIterator(document)); int position= widgetOffset2ModelOffset(viewer, viewer.getTextWidget().getCaretOffset()); if (position == -1) return; int previous= findPreviousPosition(position); if (isBlockSelectionModeEnabled() && document.getLineOfOffset(previous) != document.getLineOfOffset(position)) { super.run(); // may navigate into virtual white space } else if (previous != BreakIterator.DONE) { setCaretPosition(previous); getTextWidget().showSelection(); fireSelectionChanged(); } } catch (BadLocationException x) { // ignore - getLineOfOffset failed } }
Example 11
Source File: JavaEditor.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 | 5 votes |
/** * Finds the next position after the given position. * * @param position the current position * @return the next position */ protected int findNextPosition(int position) { ISourceViewer viewer= getSourceViewer(); int widget= -1; int next= position; while (next != BreakIterator.DONE && widget == -1) { // XXX: optimize next= fIterator.following(next); if (next != BreakIterator.DONE) widget= modelOffset2WidgetOffset(viewer, next); } IDocument document= viewer.getDocument(); LinkedModeModel model= LinkedModeModel.getModel(document, position); if (model != null && next != BreakIterator.DONE) { LinkedPosition linkedPosition= model.findPosition(new LinkedPosition(document, position, 0)); if (linkedPosition != null) { int linkedPositionEnd= linkedPosition.getOffset() + linkedPosition.getLength(); if (position != linkedPositionEnd && linkedPositionEnd < next) next= linkedPositionEnd; } else { LinkedPosition nextLinkedPosition= model.findPosition(new LinkedPosition(document, next, 0)); if (nextLinkedPosition != null) { int nextLinkedPositionOffset= nextLinkedPosition.getOffset(); if (position != nextLinkedPositionOffset && nextLinkedPositionOffset < next) next= nextLinkedPositionOffset; } } } return next; }
Example 12
Source File: JavaEditor.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 | 5 votes |
@Override public void run() { // Check whether we are in a java code partition and the preference is enabled final IPreferenceStore store= getPreferenceStore(); if (!store.getBoolean(PreferenceConstants.EDITOR_SUB_WORD_NAVIGATION)) { super.run(); return; } final ISourceViewer viewer= getSourceViewer(); final IDocument document= viewer.getDocument(); try { fIterator.setText((CharacterIterator)new DocumentCharacterIterator(document)); int position= widgetOffset2ModelOffset(viewer, viewer.getTextWidget().getCaretOffset()); if (position == -1) return; int next= findNextPosition(position); if (isBlockSelectionModeEnabled() && document.getLineOfOffset(next) != document.getLineOfOffset(position)) { super.run(); // may navigate into virtual white space } else if (next != BreakIterator.DONE) { setCaretPosition(next); getTextWidget().showSelection(); fireSelectionChanged(); } } catch (BadLocationException x) { // ignore } }
Example 13
Source File: RenamingNameSuggestor.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 | 5 votes |
/** * Grab a list of camelCase-separated suffixes from the typeName, for * example: * * "JavaElementName" => { "Java", "Element", "Name } * * "ASTNode" => { "AST", "Node" } * */ private String[] getSuffixes(String typeName) { List<String> suffixes= new ArrayList<String>(); JavaWordIterator iterator= new JavaWordIterator(); iterator.setText(typeName); int lastmatch= 0; int match; while ( (match= iterator.next()) != BreakIterator.DONE) { suffixes.add(typeName.substring(lastmatch, match)); lastmatch= match; } return suffixes.toArray(new String[0]); }
Example 14
Source File: BreakIteratorWrapper.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Returns current rule status for the text between breaks. (determines token type) */ private int calcStatus(int current, int next) { // to support presentation selectors, we need to handle alphanum, num, and none at least, so currently not worth optimizing. // https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3AEmoji%3A%5D-%5B%3AEmoji_Presentation%3A%5D&g=Word_Break&i= if (next != BreakIterator.DONE && isEmoji(current, next)) { return ICUTokenizerConfig.EMOJI_SEQUENCE_STATUS; } else { return rbbi.getRuleStatus(); } }
Example 15
Source File: SpellCheckIterator.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 | 4 votes |
public final boolean hasNext() { return fSuccessor != BreakIterator.DONE; }
Example 16
Source File: SpellCheckIterator.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 | 4 votes |
/** * Determines the next token to be spell checked. * * @return the next token to be spell checked, or <code>null</code> * iff the next token is not a candidate for spell checking. */ protected String nextToken() { String token= null; fPrevious= fPredecessor; fStartsSentence= false; nextBreak(); boolean update= false; if (fNext - fPrevious > 0) { if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == IJavaDocTagConstants.JAVADOC_TAG_PREFIX) { nextBreak(); if (Character.isLetter(fContent.charAt(fPrevious + 1))) { update= true; token= fContent.substring(fPrevious, fNext); } else fPredecessor= fNext; } else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == IHtmlTagConstants.HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) || fContent.charAt(fNext) == '/')) { if (fContent.startsWith(IHtmlTagConstants.HTML_CLOSE_PREFIX, fPrevious)) nextBreak(); nextBreak(); if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == IHtmlTagConstants.HTML_TAG_POSTFIX) { nextBreak(); if (fSuccessor != BreakIterator.DONE) { update= true; token= fContent.substring(fPrevious, fNext); } } } else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == IHtmlTagConstants.HTML_ENTITY_START && (Character.isLetter(fContent.charAt(fNext)))) { nextBreak(); if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == IHtmlTagConstants.HTML_ENTITY_END) { nextBreak(); if (isToken(fContent.substring(fPrevious, fNext), IHtmlTagConstants.HTML_ENTITY_CODES)) { skipTokens(fPrevious, IHtmlTagConstants.HTML_ENTITY_END); update= true; } else token= fContent.substring(fPrevious, fNext); } else token= fContent.substring(fPrevious, fNext); update= true; } else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) { if (isUrlToken(fPrevious)) skipTokens(fPrevious, WHITE_SPACE_TOKEN); else if (isToken(IJavaDocTagConstants.JAVADOC_PARAM_TAGS)) fLastToken= null; else if (isToken(IJavaDocTagConstants.JAVADOC_REFERENCE_TAGS)) { fLastToken= null; skipTokens(fPrevious, fDelimiter.charAt(0)); } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious) && !fIsIgnoringSingleLetters) token= fContent.substring(fPrevious, fNext); update= true; } } if (update && fSentenceBreaks.size() > 0) { if (fPrevious >= nextSentence()) { while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence()) fSentenceBreaks.removeFirst(); fStartsSentence= (fLastToken == null) || (token != null); } } return token; }
Example 17
Source File: WordRecognizerWrapper.java From birt with Eclipse Public License 1.0 | 4 votes |
public boolean hasWord( ) { return end != BreakIterator.DONE && end < text.length( ); }
Example 18
Source File: SimpleFilteredSentenceBreakIterator.java From fitnotifications with Apache License 2.0 | 4 votes |
/** * Is there an exception at this point? * * @param n * @return */ private final boolean breakExceptionAt(int n) { // Note: the C++ version of this function is SimpleFilteredSentenceBreakIterator::breakExceptionAt() int bestPosn = -1; int bestValue = -1; // loops while 'n' points to an exception text.setIndex(n); backwardsTrie.reset(); int uch; // Assume a space is following the '.' (so we handle the case: "Mr. /Brown") if ((uch = text.previousCodePoint()) == ' ') { // TODO: skip a class of chars here?? // TODO only do this the 1st time? } else { uch = text.nextCodePoint(); } BytesTrie.Result r = BytesTrie.Result.INTERMEDIATE_VALUE; while ((uch = text.previousCodePoint()) != UCharacterIterator.DONE && // more to consume backwards and.. ((r = backwardsTrie.nextForCodePoint(uch)).hasNext())) {// more in the trie if (r.hasValue()) { // remember the best match so far bestPosn = text.getIndex(); bestValue = backwardsTrie.getValue(); } } if (r.matches()) { // exact match? bestValue = backwardsTrie.getValue(); bestPosn = text.getIndex(); } if (bestPosn >= 0) { if (bestValue == Builder.MATCH) { // exact match! return true; // Exception here. } else if (bestValue == Builder.PARTIAL && forwardsPartialTrie != null) { // make sure there's a forward trie // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie // to see if it matches something going forward. forwardsPartialTrie.reset(); BytesTrie.Result rfwd = BytesTrie.Result.INTERMEDIATE_VALUE; text.setIndex(bestPosn); // hope that's close .. while ((uch = text.nextCodePoint()) != BreakIterator.DONE && ((rfwd = forwardsPartialTrie.nextForCodePoint(uch)).hasNext())) { } if (rfwd.matches()) { // Exception here return true; } // else fall through } // else fall through } // else fall through return false; // No exception here. }
Example 19
Source File: SpellCheckIterator.java From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 | 3 votes |
public String next() { String token= nextToken(); while (token == null && fSuccessor != BreakIterator.DONE) token= nextToken(); fLastToken= token; return token; }