java.lang.Character.UnicodeBlock#of

Source File: KoreanUnitParser.java From KOMORAN with Apache License 2.0

6 votes

public List<Pair<Character, UnitType>> parseWithType(String str) {
    List<Pair<Character, UnitType>> result = new ArrayList<>();

    int length = str.length();
    for (int i = 0; i < length; i++) {
        char ch = str.charAt(i);
        UnicodeBlock block = UnicodeBlock.of(ch);
        if (block == UnicodeBlock.HANGUL_SYLLABLES) {
            int cho, jung, jong, tmp;
            tmp = ch - 0xAC00;
            cho = tmp / (21 * 28);
            tmp = tmp % (21 * 28);
            jung = tmp / 28;
            jong = tmp % 28;
            result.add(new Pair<>(ChoSung[cho], UnitType.CHOSUNG));
            result.add(new Pair<>(JungSung[jung], UnitType.JUNGSUNG));
            if (jong != 0) {
                result.add(new Pair<>(JongSung[jong], UnitType.JONGSUNG));
            }
        } else {
            result.add(new Pair<>(ch, UnitType.OTHER));
        }
    }
    return result;
}

Source File: TTUnicodeRange.java From jpexs-decompiler with GNU General Public License v3.0

6 votes

static public TTUnicodeRange of(long a_unicode) {
    initList();

    TTUnicodeRange retval = null;
    UnicodeBlock block = UnicodeBlock.of((int) a_unicode);
    if (block == null) {
        return retval;
    }

    int i;
    for (i = 0; i < s_list.size(); i++) {
        TTUnicodeRange range = s_list.get(i);
        if (range.m_block.equals(block)) {
            return range;
        }
    }

    return retval;
}

Source File: QueryHelper.java From fess with Apache License 2.0

6 votes

protected QueryBuilder buildMatchPhraseQuery(final String f, final String text) {
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    if (text == null || text.length() != 1
            || (!fessConfig.getIndexFieldTitle().equals(f) && !fessConfig.getIndexFieldContent().equals(f))) {
        return QueryBuilders.matchPhraseQuery(f, text);
    }

    final UnicodeBlock block = UnicodeBlock.of(text.codePointAt(0));
    if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS //
            || block == UnicodeBlock.HIRAGANA //
            || block == UnicodeBlock.KATAKANA //
            || block == UnicodeBlock.HANGUL_SYLLABLES //
    ) {
        return QueryBuilders.prefixQuery(f, text);
    }
    return QueryBuilders.matchPhraseQuery(f, text);
}

Source File: StringUtil.java From SoloPi with Apache License 2.0

5 votes

private static boolean checkCharContainChinese(char checkChar){
    UnicodeBlock ub = UnicodeBlock.of(checkChar);
    if(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS == ub ||
            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS == ub ||
            UnicodeBlock.CJK_COMPATIBILITY_FORMS == ub ||
            UnicodeBlock.CJK_RADICALS_SUPPLEMENT == ub ||
            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A == ub ||
            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B == ub){
        return true;
    }
    return false;
}

Source File: ArrowKeyMovementMethod.java From PowerFileExplorer with GNU General Public License v3.0

5 votes

private static int findWordStart(CharSequence text, int start) {
    if ( text.length() <= start ){
        return start;
    }

    UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start));

    for (; start > 0; start--) {
        char c = text.charAt(start - 1);
        UnicodeBlock cb = UnicodeBlock.of(c);
        if ( c0 == UnicodeBlock.BASIC_LATIN ){
            int type = Character.getType(c);

            if (c != '\'' &&
                type != Character.UPPERCASE_LETTER &&
                type != Character.LOWERCASE_LETTER &&
                type != Character.TITLECASE_LETTER &&
                type != Character.MODIFIER_LETTER &&
                type != Character.DECIMAL_DIGIT_NUMBER) {
                break;
            }
        }else if ( c0 != cb ){
            break;
        }
    }

    return start;
}

Source File: ArrowKeyMovementMethod.java From PowerFileExplorer with GNU General Public License v3.0

5 votes

private static int findWordEnd(CharSequence text, int end) {
    int len = text.length();

    if ( len <= end ){
        return end;
    }

    UnicodeBlock c0 = UnicodeBlock.of(text.charAt(end));

    for (; end < len; end++) {
        char c = text.charAt(end);
        UnicodeBlock cb = UnicodeBlock.of(c);
        if ( c0 == UnicodeBlock.BASIC_LATIN ){
            int type = Character.getType(c);

            if (c != '\'' &&
                type != Character.UPPERCASE_LETTER &&
                type != Character.LOWERCASE_LETTER &&
                type != Character.TITLECASE_LETTER &&
                type != Character.MODIFIER_LETTER &&
                type != Character.DECIMAL_DIGIT_NUMBER) {
                break;
            }
        }else if ( c0 != cb ){
            break;
        }
    }

    return end;
}

Source File: JRPdfExporter.java From jasperreports with GNU Lesser General Public License v3.0

5 votes

protected boolean toUseGlyphRenderer(JRPrintText text)
{
	String value = styledTextUtil.getTruncatedText(text);
	if (value == null)
	{
		return false;
	}
	
	if (glyphRendererBlocks.isEmpty())
	{
		return false;
	}
	
	int charCount = value.length();
	char[] chars = new char[charCount];
	value.getChars(0, charCount, chars, 0);
	for (char c : chars)
	{
		UnicodeBlock block = UnicodeBlock.of(c);
		if (glyphRendererBlocks.contains(block))
		{
			if (log.isTraceEnabled())
			{
				log.trace("found character in block " + block + ", using the glyph renderer");
			}
			
			return true;
		}
	}
	
	return false;
}

Source File: PhoneNumberMatcher.java From libphonenumber-android with Apache License 2.0

5 votes

/**
 * Helper method to determine if a character is a Latin-script letter or not. For our purposes,
 * combining marks should also return true since we assume they have been added to a preceding
 * Latin character.
 */
// @VisibleForTesting
static boolean isLatinLetter(char letter) {
  // Combining marks are a subset of non-spacing-mark.
  if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
    return false;
  }
  UnicodeBlock block = UnicodeBlock.of(letter);
  return block.equals(UnicodeBlock.BASIC_LATIN)
      || block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT)
      || block.equals(UnicodeBlock.LATIN_EXTENDED_A)
      || block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL)
      || block.equals(UnicodeBlock.LATIN_EXTENDED_B)
      || block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
}

Source File: KoreanUnitParser.java From KOMORAN with Apache License 2.0

5 votes

@Override
public String parse(String str) {

    StringBuffer result = new StringBuffer();

    int i = 0;
    int length = str.length();
    for (i = 0; i < length; i++) {
        char ch = str.charAt(i);
        UnicodeBlock block = UnicodeBlock.of(ch);
        if (block == UnicodeBlock.HANGUL_SYLLABLES) {
            int cho, jung, jong, tmp;
            tmp = ch - 0xAC00;
            cho = tmp / (21 * 28);
            tmp = tmp % (21 * 28);
            jung = tmp / 28;
            jong = tmp % 28;
            result.append(ChoSung[cho]);
            result.append(JungSung[jung]);
            if (jong != 0) {
                result.append(JongSung[jong]);
            }
        } else {
            result.append(ch);
        }
    }
    return result.toString();
}

Source File: ArrowKeyMovementMethod.java From JotaTextEditor with Apache License 2.0

5 votes

private static int findWordStart(CharSequence text, int start) {
    if ( text.length() <= start ){
        return start;
    }

    UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start));

    for (; start > 0; start--) {
        char c = text.charAt(start - 1);
        UnicodeBlock cb = UnicodeBlock.of(c);
        if ( c0 == UnicodeBlock.BASIC_LATIN ){
            int type = Character.getType(c);

            if (c != '\'' &&
                type != Character.UPPERCASE_LETTER &&
                type != Character.LOWERCASE_LETTER &&
                type != Character.TITLECASE_LETTER &&
                type != Character.MODIFIER_LETTER &&
                type != Character.DECIMAL_DIGIT_NUMBER) {
                break;
            }
        }else if ( c0 != cb ){
            break;
        }
    }

    return start;
}

Source File: ArrowKeyMovementMethod.java From JotaTextEditor with Apache License 2.0

5 votes

private static int findWordEnd(CharSequence text, int end) {
    int len = text.length();

    if ( len <= end ){
        return end;
    }

    UnicodeBlock c0 = UnicodeBlock.of(text.charAt(end));

    for (; end < len; end++) {
        char c = text.charAt(end);
        UnicodeBlock cb = UnicodeBlock.of(c);
        if ( c0 == UnicodeBlock.BASIC_LATIN ){
            int type = Character.getType(c);

            if (c != '\'' &&
                type != Character.UPPERCASE_LETTER &&
                type != Character.LOWERCASE_LETTER &&
                type != Character.TITLECASE_LETTER &&
                type != Character.MODIFIER_LETTER &&
                type != Character.DECIMAL_DIGIT_NUMBER) {
                break;
            }
        }else if ( c0 != cb ){
            break;
        }
    }

    return end;
}

Source File: LangDetection.java From fnlp with GNU Lesser General Public License v3.0

5 votes

private static boolean isChinese(char c) {
	UnicodeBlock ub = UnicodeBlock.of(c);
	if(ub==UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS ||
		ub == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS||
		ub == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A||
		ub == UnicodeBlock.GENERAL_PUNCTUATION||
		ub == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION||
		ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
		return true;
	return false;
}

Source File: JapaneseLanguageValidator.java From nomulus with Apache License 2.0

4 votes

@Override
boolean isValidLabelForLanguage(String label) {
  boolean requiresJapaneseNonExceptionCodepoint = false;
  boolean hasJapaneseCodepoint = false;
  boolean hasJapaneseNonExceptionCodepoint = false;

  final int length = label.length();
  int codepoints = 0;
  UnicodeBlock precedingUnicodeBlock = null;
  for (int i = 0; i < length; ) {
    int codepoint = label.codePointAt(i);
    UnicodeBlock unicodeBlock = UnicodeBlock.of(codepoint);
    boolean isException = JAPANESE_EXCEPTION_CODEPOINTS.contains(codepoint);
    boolean isJapanese = JAPANESE_UNICODE_BLOCKS.contains(unicodeBlock);

    // A label containing KATAKANA_MIDDLE_DOT or IDEOGRAPHIC_CLOSING_MARK requires a Japanese
    // language codepoint to also appear in the label.
    if (codepoint == KATAKANA_MIDDLE_DOT || codepoint == IDEOGRAPHIC_CLOSING_MARK) {
      requiresJapaneseNonExceptionCodepoint = true;
    }

    // The KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK can only occur after a HIRAGANA or KATAKANA
    // character.
    if (codepoint == KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK
        && !Objects.equals(precedingUnicodeBlock, HIRAGANA)
        && !Objects.equals(precedingUnicodeBlock, KATAKANA)) {
      return false;
    }

    // If a codepoint is Japanese but not an "exception" codepoint, then it must a non-exception
    // Japanese codepoint.
    if (isJapanese && !isException) {
      hasJapaneseNonExceptionCodepoint = true;
    }

    // Make a note if we've seen any Japanese codepoint. Note that this object should really only
    // be used on a Japanese IDN table, and thus any non-ASCII codepoint should really be
    // Japanese. But we do the additional check again the characters UnicodeBlock just in case.
    if (isJapanese) {
      hasJapaneseCodepoint = true;
    }

    // Some codepoints take up more than one character in Java strings (e.g. high and low
    // surrogates).
    i += Character.charCount(codepoint);
    ++codepoints;
    precedingUnicodeBlock = unicodeBlock;
  }

  // A label with the KATAKANA MIDDLE DOT or IDEOGRAPHIC_CLOSING_MARK codepoint must also have
  // some Japanese character in the label. The Japanese "exception" characters do not count in
  // this regard.
  if (requiresJapaneseNonExceptionCodepoint && !hasJapaneseNonExceptionCodepoint) {
    return false;
  }

  // Any label with Japanese characters (including "exception" characters) can only be 15
  // codepoints long.
  return !(hasJapaneseCodepoint && (codepoints > MAX_LENGTH_JAPANESE_STRING));

}

Source File: SearchScreenOverlay.java From talkback with Apache License 2.0

4 votes

/** Determines if the specified character is a Japanese syllabary. */
static boolean isJapaneseSyllabary(char c) {
  UnicodeBlock block = UnicodeBlock.of(c);
  return block != null
      && (block.equals(UnicodeBlock.HIRAGANA) || block.equals(UnicodeBlock.KATAKANA));
}

Java Code Examples for java.lang.Character.UnicodeBlock#of()