Java Code Examples for com.ibm.icu.text.RuleBasedBreakIterator#WORD_LETTER

The following examples show how to use com.ibm.icu.text.RuleBasedBreakIterator#WORD_LETTER . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DefaultICUTokenizerConfig.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public String getType(int script, int ruleStatus) {
  switch (ruleStatus) {
    case RuleBasedBreakIterator.WORD_IDEO:
      return WORD_IDEO;
    case RuleBasedBreakIterator.WORD_KANA:
      return script == UScript.HIRAGANA ? WORD_HIRAGANA : WORD_KATAKANA;
    case RuleBasedBreakIterator.WORD_LETTER:
      return script == UScript.HANGUL ? WORD_HANGUL : WORD_LETTER;
    case RuleBasedBreakIterator.WORD_NUMBER:
      return WORD_NUMBER;
    case EMOJI_SEQUENCE_STATUS:
      return WORD_EMOJI;
    default: /* some other custom code */
      return "<OTHER>";
  }
}
 
Example 2
Source File: BreakIteratorWrapper.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
private int calcStatus(int current, int next) {
    if (current == BreakIterator.DONE || next == BreakIterator.DONE) {
        return RuleBasedBreakIterator.WORD_NONE;
    }
    int begin = start + current;
    int end = start + next;
    int codepoint;
    for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
        codepoint = UTF16.charAt(text, 0, end, begin);
        if (UCharacter.isDigit(codepoint)) {
            return RuleBasedBreakIterator.WORD_NUMBER;
        } else if (UCharacter.isLetter(codepoint)) {
            return RuleBasedBreakIterator.WORD_LETTER;
        }
    }
    return RuleBasedBreakIterator.WORD_NONE;
}
 
Example 3
Source File: DefaultIcuTokenizerConfig.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public String getType(int script, int ruleStatus) {
    switch (ruleStatus) {
        case RuleBasedBreakIterator.WORD_IDEO:
            return WORD_IDEO;
        case RuleBasedBreakIterator.WORD_KANA:
            return script == UScript.HIRAGANA ? WORD_HIRAGANA : WORD_KATAKANA;
        case RuleBasedBreakIterator.WORD_LETTER:
            return script == UScript.HANGUL ? WORD_HANGUL : WORD_LETTER;
        case RuleBasedBreakIterator.WORD_NUMBER:
            return WORD_NUMBER;
        default: /* some other custom code */
            return "<OTHER>";
    }
}