Java Code Examples for java.lang.Character.UnicodeBlock#of()
The following examples show how to use
java.lang.Character.UnicodeBlock#of() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KoreanUnitParser.java From KOMORAN with Apache License 2.0 | 6 votes |
public List<Pair<Character, UnitType>> parseWithType(String str) { List<Pair<Character, UnitType>> result = new ArrayList<>(); int length = str.length(); for (int i = 0; i < length; i++) { char ch = str.charAt(i); UnicodeBlock block = UnicodeBlock.of(ch); if (block == UnicodeBlock.HANGUL_SYLLABLES) { int cho, jung, jong, tmp; tmp = ch - 0xAC00; cho = tmp / (21 * 28); tmp = tmp % (21 * 28); jung = tmp / 28; jong = tmp % 28; result.add(new Pair<>(ChoSung[cho], UnitType.CHOSUNG)); result.add(new Pair<>(JungSung[jung], UnitType.JUNGSUNG)); if (jong != 0) { result.add(new Pair<>(JongSung[jong], UnitType.JONGSUNG)); } } else { result.add(new Pair<>(ch, UnitType.OTHER)); } } return result; }
Example 2
Source File: TTUnicodeRange.java From jpexs-decompiler with GNU General Public License v3.0 | 6 votes |
static public TTUnicodeRange of(long a_unicode) { initList(); TTUnicodeRange retval = null; UnicodeBlock block = UnicodeBlock.of((int) a_unicode); if (block == null) { return retval; } int i; for (i = 0; i < s_list.size(); i++) { TTUnicodeRange range = s_list.get(i); if (range.m_block.equals(block)) { return range; } } return retval; }
Example 3
Source File: QueryHelper.java From fess with Apache License 2.0 | 6 votes |
protected QueryBuilder buildMatchPhraseQuery(final String f, final String text) { final FessConfig fessConfig = ComponentUtil.getFessConfig(); if (text == null || text.length() != 1 || (!fessConfig.getIndexFieldTitle().equals(f) && !fessConfig.getIndexFieldContent().equals(f))) { return QueryBuilders.matchPhraseQuery(f, text); } final UnicodeBlock block = UnicodeBlock.of(text.codePointAt(0)); if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS // || block == UnicodeBlock.HIRAGANA // || block == UnicodeBlock.KATAKANA // || block == UnicodeBlock.HANGUL_SYLLABLES // ) { return QueryBuilders.prefixQuery(f, text); } return QueryBuilders.matchPhraseQuery(f, text); }
Example 4
Source File: StringUtil.java From SoloPi with Apache License 2.0 | 5 votes |
private static boolean checkCharContainChinese(char checkChar){ UnicodeBlock ub = UnicodeBlock.of(checkChar); if(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS == ub || UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS == ub || UnicodeBlock.CJK_COMPATIBILITY_FORMS == ub || UnicodeBlock.CJK_RADICALS_SUPPLEMENT == ub || UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A == ub || UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B == ub){ return true; } return false; }
Example 5
Source File: ArrowKeyMovementMethod.java From PowerFileExplorer with GNU General Public License v3.0 | 5 votes |
private static int findWordStart(CharSequence text, int start) { if ( text.length() <= start ){ return start; } UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start)); for (; start > 0; start--) { char c = text.charAt(start - 1); UnicodeBlock cb = UnicodeBlock.of(c); if ( c0 == UnicodeBlock.BASIC_LATIN ){ int type = Character.getType(c); if (c != '\'' && type != Character.UPPERCASE_LETTER && type != Character.LOWERCASE_LETTER && type != Character.TITLECASE_LETTER && type != Character.MODIFIER_LETTER && type != Character.DECIMAL_DIGIT_NUMBER) { break; } }else if ( c0 != cb ){ break; } } return start; }
Example 6
Source File: ArrowKeyMovementMethod.java From PowerFileExplorer with GNU General Public License v3.0 | 5 votes |
private static int findWordEnd(CharSequence text, int end) { int len = text.length(); if ( len <= end ){ return end; } UnicodeBlock c0 = UnicodeBlock.of(text.charAt(end)); for (; end < len; end++) { char c = text.charAt(end); UnicodeBlock cb = UnicodeBlock.of(c); if ( c0 == UnicodeBlock.BASIC_LATIN ){ int type = Character.getType(c); if (c != '\'' && type != Character.UPPERCASE_LETTER && type != Character.LOWERCASE_LETTER && type != Character.TITLECASE_LETTER && type != Character.MODIFIER_LETTER && type != Character.DECIMAL_DIGIT_NUMBER) { break; } }else if ( c0 != cb ){ break; } } return end; }
Example 7
Source File: JRPdfExporter.java From jasperreports with GNU Lesser General Public License v3.0 | 5 votes |
protected boolean toUseGlyphRenderer(JRPrintText text) { String value = styledTextUtil.getTruncatedText(text); if (value == null) { return false; } if (glyphRendererBlocks.isEmpty()) { return false; } int charCount = value.length(); char[] chars = new char[charCount]; value.getChars(0, charCount, chars, 0); for (char c : chars) { UnicodeBlock block = UnicodeBlock.of(c); if (glyphRendererBlocks.contains(block)) { if (log.isTraceEnabled()) { log.trace("found character in block " + block + ", using the glyph renderer"); } return true; } } return false; }
Example 8
Source File: PhoneNumberMatcher.java From libphonenumber-android with Apache License 2.0 | 5 votes |
/** * Helper method to determine if a character is a Latin-script letter or not. For our purposes, * combining marks should also return true since we assume they have been added to a preceding * Latin character. */ // @VisibleForTesting static boolean isLatinLetter(char letter) { // Combining marks are a subset of non-spacing-mark. if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) { return false; } UnicodeBlock block = UnicodeBlock.of(letter); return block.equals(UnicodeBlock.BASIC_LATIN) || block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) || block.equals(UnicodeBlock.LATIN_EXTENDED_A) || block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) || block.equals(UnicodeBlock.LATIN_EXTENDED_B) || block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS); }
Example 9
Source File: KoreanUnitParser.java From KOMORAN with Apache License 2.0 | 5 votes |
@Override public String parse(String str) { StringBuffer result = new StringBuffer(); int i = 0; int length = str.length(); for (i = 0; i < length; i++) { char ch = str.charAt(i); UnicodeBlock block = UnicodeBlock.of(ch); if (block == UnicodeBlock.HANGUL_SYLLABLES) { int cho, jung, jong, tmp; tmp = ch - 0xAC00; cho = tmp / (21 * 28); tmp = tmp % (21 * 28); jung = tmp / 28; jong = tmp % 28; result.append(ChoSung[cho]); result.append(JungSung[jung]); if (jong != 0) { result.append(JongSung[jong]); } } else { result.append(ch); } } return result.toString(); }
Example 10
Source File: ArrowKeyMovementMethod.java From JotaTextEditor with Apache License 2.0 | 5 votes |
private static int findWordStart(CharSequence text, int start) { if ( text.length() <= start ){ return start; } UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start)); for (; start > 0; start--) { char c = text.charAt(start - 1); UnicodeBlock cb = UnicodeBlock.of(c); if ( c0 == UnicodeBlock.BASIC_LATIN ){ int type = Character.getType(c); if (c != '\'' && type != Character.UPPERCASE_LETTER && type != Character.LOWERCASE_LETTER && type != Character.TITLECASE_LETTER && type != Character.MODIFIER_LETTER && type != Character.DECIMAL_DIGIT_NUMBER) { break; } }else if ( c0 != cb ){ break; } } return start; }
Example 11
Source File: ArrowKeyMovementMethod.java From JotaTextEditor with Apache License 2.0 | 5 votes |
private static int findWordEnd(CharSequence text, int end) { int len = text.length(); if ( len <= end ){ return end; } UnicodeBlock c0 = UnicodeBlock.of(text.charAt(end)); for (; end < len; end++) { char c = text.charAt(end); UnicodeBlock cb = UnicodeBlock.of(c); if ( c0 == UnicodeBlock.BASIC_LATIN ){ int type = Character.getType(c); if (c != '\'' && type != Character.UPPERCASE_LETTER && type != Character.LOWERCASE_LETTER && type != Character.TITLECASE_LETTER && type != Character.MODIFIER_LETTER && type != Character.DECIMAL_DIGIT_NUMBER) { break; } }else if ( c0 != cb ){ break; } } return end; }
Example 12
Source File: LangDetection.java From fnlp with GNU Lesser General Public License v3.0 | 5 votes |
private static boolean isChinese(char c) { UnicodeBlock ub = UnicodeBlock.of(c); if(ub==UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || ub == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS|| ub == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A|| ub == UnicodeBlock.GENERAL_PUNCTUATION|| ub == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION|| ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) return true; return false; }
Example 13
Source File: JapaneseLanguageValidator.java From nomulus with Apache License 2.0 | 4 votes |
@Override boolean isValidLabelForLanguage(String label) { boolean requiresJapaneseNonExceptionCodepoint = false; boolean hasJapaneseCodepoint = false; boolean hasJapaneseNonExceptionCodepoint = false; final int length = label.length(); int codepoints = 0; UnicodeBlock precedingUnicodeBlock = null; for (int i = 0; i < length; ) { int codepoint = label.codePointAt(i); UnicodeBlock unicodeBlock = UnicodeBlock.of(codepoint); boolean isException = JAPANESE_EXCEPTION_CODEPOINTS.contains(codepoint); boolean isJapanese = JAPANESE_UNICODE_BLOCKS.contains(unicodeBlock); // A label containing KATAKANA_MIDDLE_DOT or IDEOGRAPHIC_CLOSING_MARK requires a Japanese // language codepoint to also appear in the label. if (codepoint == KATAKANA_MIDDLE_DOT || codepoint == IDEOGRAPHIC_CLOSING_MARK) { requiresJapaneseNonExceptionCodepoint = true; } // The KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK can only occur after a HIRAGANA or KATAKANA // character. if (codepoint == KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK && !Objects.equals(precedingUnicodeBlock, HIRAGANA) && !Objects.equals(precedingUnicodeBlock, KATAKANA)) { return false; } // If a codepoint is Japanese but not an "exception" codepoint, then it must a non-exception // Japanese codepoint. if (isJapanese && !isException) { hasJapaneseNonExceptionCodepoint = true; } // Make a note if we've seen any Japanese codepoint. Note that this object should really only // be used on a Japanese IDN table, and thus any non-ASCII codepoint should really be // Japanese. But we do the additional check again the characters UnicodeBlock just in case. if (isJapanese) { hasJapaneseCodepoint = true; } // Some codepoints take up more than one character in Java strings (e.g. high and low // surrogates). i += Character.charCount(codepoint); ++codepoints; precedingUnicodeBlock = unicodeBlock; } // A label with the KATAKANA MIDDLE DOT or IDEOGRAPHIC_CLOSING_MARK codepoint must also have // some Japanese character in the label. The Japanese "exception" characters do not count in // this regard. if (requiresJapaneseNonExceptionCodepoint && !hasJapaneseNonExceptionCodepoint) { return false; } // Any label with Japanese characters (including "exception" characters) can only be 15 // codepoints long. return !(hasJapaneseCodepoint && (codepoints > MAX_LENGTH_JAPANESE_STRING)); }
Example 14
Source File: SearchScreenOverlay.java From talkback with Apache License 2.0 | 4 votes |
/** Determines if the specified character is a Japanese syllabary. */ static boolean isJapaneseSyllabary(char c) { UnicodeBlock block = UnicodeBlock.of(c); return block != null && (block.equals(UnicodeBlock.HIRAGANA) || block.equals(UnicodeBlock.KATAKANA)); }