Java Code Examples for java.lang.Character.UnicodeBlock#of()

The following examples show how to use java.lang.Character.UnicodeBlock#of() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KoreanUnitParser.java    From KOMORAN with Apache License 2.0 6 votes vote down vote up
public List<Pair<Character, UnitType>> parseWithType(String str) {
    List<Pair<Character, UnitType>> result = new ArrayList<>();

    int length = str.length();
    for (int i = 0; i < length; i++) {
        char ch = str.charAt(i);
        UnicodeBlock block = UnicodeBlock.of(ch);
        if (block == UnicodeBlock.HANGUL_SYLLABLES) {
            int cho, jung, jong, tmp;
            tmp = ch - 0xAC00;
            cho = tmp / (21 * 28);
            tmp = tmp % (21 * 28);
            jung = tmp / 28;
            jong = tmp % 28;
            result.add(new Pair<>(ChoSung[cho], UnitType.CHOSUNG));
            result.add(new Pair<>(JungSung[jung], UnitType.JUNGSUNG));
            if (jong != 0) {
                result.add(new Pair<>(JongSung[jong], UnitType.JONGSUNG));
            }
        } else {
            result.add(new Pair<>(ch, UnitType.OTHER));
        }
    }
    return result;
}
 
Example 2
Source File: TTUnicodeRange.java    From jpexs-decompiler with GNU General Public License v3.0 6 votes vote down vote up
static public TTUnicodeRange of(long a_unicode) {
    initList();

    TTUnicodeRange retval = null;
    UnicodeBlock block = UnicodeBlock.of((int) a_unicode);
    if (block == null) {
        return retval;
    }

    int i;
    for (i = 0; i < s_list.size(); i++) {
        TTUnicodeRange range = s_list.get(i);
        if (range.m_block.equals(block)) {
            return range;
        }
    }

    return retval;
}
 
Example 3
Source File: QueryHelper.java    From fess with Apache License 2.0 6 votes vote down vote up
protected QueryBuilder buildMatchPhraseQuery(final String f, final String text) {
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    if (text == null || text.length() != 1
            || (!fessConfig.getIndexFieldTitle().equals(f) && !fessConfig.getIndexFieldContent().equals(f))) {
        return QueryBuilders.matchPhraseQuery(f, text);
    }

    final UnicodeBlock block = UnicodeBlock.of(text.codePointAt(0));
    if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS //
            || block == UnicodeBlock.HIRAGANA //
            || block == UnicodeBlock.KATAKANA //
            || block == UnicodeBlock.HANGUL_SYLLABLES //
    ) {
        return QueryBuilders.prefixQuery(f, text);
    }
    return QueryBuilders.matchPhraseQuery(f, text);
}
 
Example 4
Source File: StringUtil.java    From SoloPi with Apache License 2.0 5 votes vote down vote up
private static boolean checkCharContainChinese(char checkChar){
    UnicodeBlock ub = UnicodeBlock.of(checkChar);
    if(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS == ub ||
            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS == ub ||
            UnicodeBlock.CJK_COMPATIBILITY_FORMS == ub ||
            UnicodeBlock.CJK_RADICALS_SUPPLEMENT == ub ||
            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A == ub ||
            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B == ub){
        return true;
    }
    return false;
}
 
Example 5
Source File: ArrowKeyMovementMethod.java    From PowerFileExplorer with GNU General Public License v3.0 5 votes vote down vote up
private static int findWordStart(CharSequence text, int start) {
    if ( text.length() <= start ){
        return start;
    }

    UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start));

    for (; start > 0; start--) {
        char c = text.charAt(start - 1);
        UnicodeBlock cb = UnicodeBlock.of(c);
        if ( c0 == UnicodeBlock.BASIC_LATIN ){
            int type = Character.getType(c);

            if (c != '\'' &&
                type != Character.UPPERCASE_LETTER &&
                type != Character.LOWERCASE_LETTER &&
                type != Character.TITLECASE_LETTER &&
                type != Character.MODIFIER_LETTER &&
                type != Character.DECIMAL_DIGIT_NUMBER) {
                break;
            }
        }else if ( c0 != cb ){
            break;
        }
    }

    return start;
}
 
Example 6
Source File: ArrowKeyMovementMethod.java    From PowerFileExplorer with GNU General Public License v3.0 5 votes vote down vote up
private static int findWordEnd(CharSequence text, int end) {
    int len = text.length();

    if ( len <= end ){
        return end;
    }

    UnicodeBlock c0 = UnicodeBlock.of(text.charAt(end));

    for (; end < len; end++) {
        char c = text.charAt(end);
        UnicodeBlock cb = UnicodeBlock.of(c);
        if ( c0 == UnicodeBlock.BASIC_LATIN ){
            int type = Character.getType(c);

            if (c != '\'' &&
                type != Character.UPPERCASE_LETTER &&
                type != Character.LOWERCASE_LETTER &&
                type != Character.TITLECASE_LETTER &&
                type != Character.MODIFIER_LETTER &&
                type != Character.DECIMAL_DIGIT_NUMBER) {
                break;
            }
        }else if ( c0 != cb ){
            break;
        }
    }

    return end;
}
 
Example 7
Source File: JRPdfExporter.java    From jasperreports with GNU Lesser General Public License v3.0 5 votes vote down vote up
protected boolean toUseGlyphRenderer(JRPrintText text)
{
	String value = styledTextUtil.getTruncatedText(text);
	if (value == null)
	{
		return false;
	}
	
	if (glyphRendererBlocks.isEmpty())
	{
		return false;
	}
	
	int charCount = value.length();
	char[] chars = new char[charCount];
	value.getChars(0, charCount, chars, 0);
	for (char c : chars)
	{
		UnicodeBlock block = UnicodeBlock.of(c);
		if (glyphRendererBlocks.contains(block))
		{
			if (log.isTraceEnabled())
			{
				log.trace("found character in block " + block + ", using the glyph renderer");
			}
			
			return true;
		}
	}
	
	return false;
}
 
Example 8
Source File: PhoneNumberMatcher.java    From libphonenumber-android with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to determine if a character is a Latin-script letter or not. For our purposes,
 * combining marks should also return true since we assume they have been added to a preceding
 * Latin character.
 */
// @VisibleForTesting
static boolean isLatinLetter(char letter) {
  // Combining marks are a subset of non-spacing-mark.
  if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
    return false;
  }
  UnicodeBlock block = UnicodeBlock.of(letter);
  return block.equals(UnicodeBlock.BASIC_LATIN)
      || block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT)
      || block.equals(UnicodeBlock.LATIN_EXTENDED_A)
      || block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL)
      || block.equals(UnicodeBlock.LATIN_EXTENDED_B)
      || block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
}
 
Example 9
Source File: KoreanUnitParser.java    From KOMORAN with Apache License 2.0 5 votes vote down vote up
@Override
public String parse(String str) {

    StringBuffer result = new StringBuffer();

    int i = 0;
    int length = str.length();
    for (i = 0; i < length; i++) {
        char ch = str.charAt(i);
        UnicodeBlock block = UnicodeBlock.of(ch);
        if (block == UnicodeBlock.HANGUL_SYLLABLES) {
            int cho, jung, jong, tmp;
            tmp = ch - 0xAC00;
            cho = tmp / (21 * 28);
            tmp = tmp % (21 * 28);
            jung = tmp / 28;
            jong = tmp % 28;
            result.append(ChoSung[cho]);
            result.append(JungSung[jung]);
            if (jong != 0) {
                result.append(JongSung[jong]);
            }
        } else {
            result.append(ch);
        }
    }
    return result.toString();
}
 
Example 10
Source File: ArrowKeyMovementMethod.java    From JotaTextEditor with Apache License 2.0 5 votes vote down vote up
private static int findWordStart(CharSequence text, int start) {
    if ( text.length() <= start ){
        return start;
    }

    UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start));

    for (; start > 0; start--) {
        char c = text.charAt(start - 1);
        UnicodeBlock cb = UnicodeBlock.of(c);
        if ( c0 == UnicodeBlock.BASIC_LATIN ){
            int type = Character.getType(c);

            if (c != '\'' &&
                type != Character.UPPERCASE_LETTER &&
                type != Character.LOWERCASE_LETTER &&
                type != Character.TITLECASE_LETTER &&
                type != Character.MODIFIER_LETTER &&
                type != Character.DECIMAL_DIGIT_NUMBER) {
                break;
            }
        }else if ( c0 != cb ){
            break;
        }
    }

    return start;
}
 
Example 11
Source File: ArrowKeyMovementMethod.java    From JotaTextEditor with Apache License 2.0 5 votes vote down vote up
private static int findWordEnd(CharSequence text, int end) {
    int len = text.length();

    if ( len <= end ){
        return end;
    }

    UnicodeBlock c0 = UnicodeBlock.of(text.charAt(end));

    for (; end < len; end++) {
        char c = text.charAt(end);
        UnicodeBlock cb = UnicodeBlock.of(c);
        if ( c0 == UnicodeBlock.BASIC_LATIN ){
            int type = Character.getType(c);

            if (c != '\'' &&
                type != Character.UPPERCASE_LETTER &&
                type != Character.LOWERCASE_LETTER &&
                type != Character.TITLECASE_LETTER &&
                type != Character.MODIFIER_LETTER &&
                type != Character.DECIMAL_DIGIT_NUMBER) {
                break;
            }
        }else if ( c0 != cb ){
            break;
        }
    }

    return end;
}
 
Example 12
Source File: LangDetection.java    From fnlp with GNU Lesser General Public License v3.0 5 votes vote down vote up
private static boolean isChinese(char c) {
	UnicodeBlock ub = UnicodeBlock.of(c);
	if(ub==UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS ||
		ub == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS||
		ub == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A||
		ub == UnicodeBlock.GENERAL_PUNCTUATION||
		ub == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION||
		ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
		return true;
	return false;
}
 
Example 13
Source File: JapaneseLanguageValidator.java    From nomulus with Apache License 2.0 4 votes vote down vote up
@Override
boolean isValidLabelForLanguage(String label) {
  boolean requiresJapaneseNonExceptionCodepoint = false;
  boolean hasJapaneseCodepoint = false;
  boolean hasJapaneseNonExceptionCodepoint = false;

  final int length = label.length();
  int codepoints = 0;
  UnicodeBlock precedingUnicodeBlock = null;
  for (int i = 0; i < length; ) {
    int codepoint = label.codePointAt(i);
    UnicodeBlock unicodeBlock = UnicodeBlock.of(codepoint);
    boolean isException = JAPANESE_EXCEPTION_CODEPOINTS.contains(codepoint);
    boolean isJapanese = JAPANESE_UNICODE_BLOCKS.contains(unicodeBlock);

    // A label containing KATAKANA_MIDDLE_DOT or IDEOGRAPHIC_CLOSING_MARK requires a Japanese
    // language codepoint to also appear in the label.
    if (codepoint == KATAKANA_MIDDLE_DOT || codepoint == IDEOGRAPHIC_CLOSING_MARK) {
      requiresJapaneseNonExceptionCodepoint = true;
    }

    // The KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK can only occur after a HIRAGANA or KATAKANA
    // character.
    if (codepoint == KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK
        && !Objects.equals(precedingUnicodeBlock, HIRAGANA)
        && !Objects.equals(precedingUnicodeBlock, KATAKANA)) {
      return false;
    }

    // If a codepoint is Japanese but not an "exception" codepoint, then it must a non-exception
    // Japanese codepoint.
    if (isJapanese && !isException) {
      hasJapaneseNonExceptionCodepoint = true;
    }

    // Make a note if we've seen any Japanese codepoint. Note that this object should really only
    // be used on a Japanese IDN table, and thus any non-ASCII codepoint should really be
    // Japanese. But we do the additional check again the characters UnicodeBlock just in case.
    if (isJapanese) {
      hasJapaneseCodepoint = true;
    }

    // Some codepoints take up more than one character in Java strings (e.g. high and low
    // surrogates).
    i += Character.charCount(codepoint);
    ++codepoints;
    precedingUnicodeBlock = unicodeBlock;
  }

  // A label with the KATAKANA MIDDLE DOT or IDEOGRAPHIC_CLOSING_MARK codepoint must also have
  // some Japanese character in the label. The Japanese "exception" characters do not count in
  // this regard.
  if (requiresJapaneseNonExceptionCodepoint && !hasJapaneseNonExceptionCodepoint) {
    return false;
  }

  // Any label with Japanese characters (including "exception" characters) can only be 15
  // codepoints long.
  return !(hasJapaneseCodepoint && (codepoints > MAX_LENGTH_JAPANESE_STRING));

}
 
Example 14
Source File: SearchScreenOverlay.java    From talkback with Apache License 2.0 4 votes vote down vote up
/** Determines if the specified character is a Japanese syllabary. */
static boolean isJapaneseSyllabary(char c) {
  UnicodeBlock block = UnicodeBlock.of(c);
  return block != null
      && (block.equals(UnicodeBlock.HIRAGANA) || block.equals(UnicodeBlock.KATAKANA));
}