com.ibm.icu.lang.UProperty Java Examples
The following examples show how to use
com.ibm.icu.lang.UProperty.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UnicodeDataTest.java From es6draft with MIT License | 6 votes |
@SuppressWarnings("deprecation") @Test public void testAllICUBinaryProperties() { for (int p = UProperty.BINARY_START; p < UProperty.BINARY_LIMIT; ++p) { String shortName = UCharacter.getPropertyName(p, UProperty.NameChoice.SHORT); if (shortName != null) { // Does not throw. isBinaryProperty(shortName); } String longName = UCharacter.getPropertyName(p, UProperty.NameChoice.LONG); if (longName != null) { // Does not throw. isBinaryProperty(longName); } } }
Example #2
Source File: ICUTokenizerFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Creates a new ICUTokenizerFactory */ public ICUTokenizerFactory(Map<String,String> args) { super(args); tailored = new HashMap<>(); String rulefilesArg = get(args, RULEFILES); if (rulefilesArg != null) { List<String> scriptAndResourcePaths = splitFileNames(rulefilesArg); for (String scriptAndResourcePath : scriptAndResourcePaths) { int colonPos = scriptAndResourcePath.indexOf(":"); String scriptCode = scriptAndResourcePath.substring(0, colonPos).trim(); String resourcePath = scriptAndResourcePath.substring(colonPos+1).trim(); tailored.put(UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptCode), resourcePath); } } cjkAsWords = getBoolean(args, "cjkAsWords", true); myanmarAsWords = getBoolean(args, "myanmarAsWords", true); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
Example #3
Source File: UBiDiProps.java From fitnotifications with Apache License 2.0 | 6 votes |
public final int getMaxValue(int which) { int max; max=indexes[IX_MAX_VALUES]; switch(which) { case UProperty.BIDI_CLASS: return (max&CLASS_MASK); case UProperty.JOINING_GROUP: return (max&MAX_JG_MASK)>>MAX_JG_SHIFT; case UProperty.JOINING_TYPE: return (max&JT_MASK)>>JT_SHIFT; case UProperty.BIDI_PAIRED_BRACKET_TYPE: return (max&BPT_MASK)>>BPT_SHIFT; default: return -1; /* undefined */ } }
Example #4
Source File: CollationRuleParser.java From fitnotifications with Apache License 2.0 | 6 votes |
/** * Gets a script or reorder code from its string representation. * @return the script/reorder code, or * -1 if not recognized */ public static int getReorderCode(String word) { for(int i = 0; i < gSpecialReorderCodes.length; ++i) { if(word.equalsIgnoreCase(gSpecialReorderCodes[i])) { return Collator.ReorderCodes.FIRST + i; } } try { int script = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, word); if(script >= 0) { return script; } } catch (IllegalIcuArgumentException e) { // fall through } if(word.equalsIgnoreCase("others")) { return Collator.ReorderCodes.OTHERS; // same as Zzzz = USCRIPT_UNKNOWN } return -1; }
Example #5
Source File: UBiDiProps.java From trekarta with GNU General Public License v3.0 | 6 votes |
public final int getMaxValue(int which) { int max; max=indexes[IX_MAX_VALUES]; switch(which) { case UProperty.BIDI_CLASS: return (max&CLASS_MASK); case UProperty.JOINING_GROUP: return (max&MAX_JG_MASK)>>MAX_JG_SHIFT; case UProperty.JOINING_TYPE: return (max&JT_MASK)>>JT_SHIFT; case UProperty.BIDI_PAIRED_BRACKET_TYPE: return (max&BPT_MASK)>>BPT_SHIFT; default: return -1; /* undefined */ } }
Example #6
Source File: PrintUtils.java From flink with Apache License 2.0 | 6 votes |
/** * Check codePoint is FullWidth or not according to Unicode Standard version 12.0.0. * See http://unicode.org/reports/tr11/ */ public static boolean isFullWidth(int codePoint) { int value = UCharacter.getIntPropertyValue(codePoint, UProperty.EAST_ASIAN_WIDTH); switch (value) { case UCharacter.EastAsianWidth.NEUTRAL: return false; case UCharacter.EastAsianWidth.AMBIGUOUS: return false; case UCharacter.EastAsianWidth.HALFWIDTH: return false; case UCharacter.EastAsianWidth.FULLWIDTH: return true; case UCharacter.EastAsianWidth.NARROW: return false; case UCharacter.EastAsianWidth.WIDE: return true; default: throw new RuntimeException("unknown UProperty.EAST_ASIAN_WIDTH: " + value); } }
Example #7
Source File: UnicodeData.java From es6draft with MIT License | 6 votes |
public boolean isValue(String valueAlias) { // Don't allow loose matching. try { int value = UCharacter.getPropertyValueEnum(propertyId, valueAlias); String shortName = UCharacter.getPropertyValueName(propertyId, value, UProperty.NameChoice.SHORT); if (shortName != null && shortName.equals(valueAlias)) { return true; } for (int i = 0;; ++i) { String longName = UCharacter.getPropertyValueName(propertyId, value, UProperty.NameChoice.LONG + i); if (longName != null && longName.equals(valueAlias)) { return true; } } } catch (IllegalArgumentException e) { return false; } }
Example #8
Source File: UnicodeData.java From es6draft with MIT License | 5 votes |
static Property from(String name) { // Don't allow loose matching. int property; CHECK: try { property = UCharacter.getPropertyEnum(name); // Filter out synthetic names. if (property == UProperty.GENERAL_CATEGORY_MASK) { return null; } String shortName = UCharacter.getPropertyName(property, UProperty.NameChoice.SHORT); if (shortName != null && shortName.equals(name)) { break CHECK; } for (int i = 0;; ++i) { String longName = UCharacter.getPropertyName(property, UProperty.NameChoice.LONG + i); if (longName != null && longName.equals(name)) { break CHECK; } } } catch (IllegalArgumentException e) { return null; } if (property >= UProperty.BINARY_START && property < BINARY_PROPERTY_LIMIT) { return BinaryProperty.forId(property); } return EnumProperty.forId(property); }
Example #9
Source File: UnicodeData.java From es6draft with MIT License | 5 votes |
static BinaryProperty forId(int propertyId) { if (propertyId >= UProperty.BINARY_START && propertyId < BINARY_PROPERTY_LIMIT) { for (BinaryProperty binary : BinaryProperty.values()) { if (binary.propertyId == propertyId) { return binary; } } } return null; }
Example #10
Source File: UCharacterProperty.java From trekarta with GNU General Public License v3.0 | 5 votes |
public int getIntPropertyMaxValue(int which) { if(which<UProperty.INT_START) { if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) { return 1; // maximum TRUE for all binary properties } } else if(which<UProperty.INT_LIMIT) { return intProps[which-UProperty.INT_START].getMaxValue(which); } return -1; // undefined }
Example #11
Source File: UCharacterProperty.java From trekarta with GNU General Public License v3.0 | 5 votes |
public int getIntPropertyValue(int c, int which) { if(which<UProperty.INT_START) { if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) { return binProps[which].contains(c) ? 1 : 0; } } else if(which<UProperty.INT_LIMIT) { return intProps[which-UProperty.INT_START].getValue(c); } else if (which == UProperty.GENERAL_CATEGORY_MASK) { return getMask(getType(c)); } return 0; // undefined }
Example #12
Source File: UCharacterProperty.java From trekarta with GNU General Public License v3.0 | 5 votes |
public boolean hasBinaryProperty(int c, int which) { if(which<UProperty.BINARY_START || UProperty.BINARY_LIMIT<=which) { // not a known binary property return false; } else { return binProps[which].contains(c); } }
Example #13
Source File: UPropertyAliases.java From trekarta with GNU General Public License v3.0 | 5 votes |
/** * Returns a value enum given a property enum and one of its value names. Does not throw. * @return value enum, or UProperty.UNDEFINED if not defined for that property */ public int getPropertyValueEnumNoThrow(int property, CharSequence alias) { int valueMapIndex=findProperty(property); if(valueMapIndex==0) { return UProperty.UNDEFINED; } valueMapIndex=valueMaps[valueMapIndex+1]; if(valueMapIndex==0) { return UProperty.UNDEFINED; } // valueMapIndex is the start of the property's valueMap, // where the first word is the BytesTrie offset. return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); }
Example #14
Source File: UnicodeData.java From es6draft with MIT License | 5 votes |
static EnumProperty forId(int propertyId) { if ((propertyId >= UProperty.INT_START && propertyId < INT_PROPERTY_LIMIT) || propertyId == UProperty.GENERAL_CATEGORY_MASK || propertyId == UProperty.SCRIPT_EXTENSIONS) { if (propertyId == UProperty.GENERAL_CATEGORY) { return General_Category; } for (EnumProperty e : EnumProperty.values()) { if (e.propertyId == propertyId) { return e; } } } return null; }
Example #15
Source File: UnicodeDataTest.java From es6draft with MIT License | 5 votes |
@SuppressWarnings("deprecation") @Test public void testLimits() { // integer valued properties for (int p = UProperty.INT_START; p < UProperty.INT_LIMIT; ++p) { int min = UCharacter.getIntPropertyMinValue(p); int max = UCharacter.getIntPropertyMaxValue(p); assertTrue(String.format("min=%d", min), min >= 0); assertTrue(String.format("min=%d, max=%d", min, max), min <= max); assertTrue(String.format("max=%d", max), max < 512); // BINARY_MASK in UEncoding } }
Example #16
Source File: UPropertyAliases.java From trekarta with GNU General Public License v3.0 | 5 votes |
private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) { BytesTrie trie=new BytesTrie(bytesTries, bytesTrieOffset); if(containsName(trie, alias)) { return trie.getValue(); } else { return UProperty.UNDEFINED; } }
Example #17
Source File: KhmerBreakEngine.java From fitnotifications with Apache License 2.0 | 5 votes |
public boolean handles(int c, int breakType) { if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) { int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT); return (script == UScript.KHMER); } return false; }
Example #18
Source File: CharacterPropertiesImpl.java From trekarta with GNU General Public License v3.0 | 5 votes |
private static UnicodeSet getIntPropInclusions(int prop) { assert(UProperty.INT_START <= prop && prop < UProperty.INT_LIMIT); int inclIndex = UCharacterProperty.SRC_COUNT + prop - UProperty.INT_START; if (inclusions[inclIndex] != null) { return inclusions[inclIndex]; } int src = UCharacterProperty.INSTANCE.getSource(prop); UnicodeSet incl = getInclusionsForSource(src); UnicodeSet intPropIncl = new UnicodeSet(0, 0); int numRanges = incl.getRangeCount(); int prevValue = 0; for (int i = 0; i < numRanges; ++i) { int rangeEnd = incl.getRangeEnd(i); for (int c = incl.getRangeStart(i); c <= rangeEnd; ++c) { // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch. int value = UCharacter.getIntPropertyValue(c, prop); if (value != prevValue) { intPropIncl.add(c); prevValue = value; } } } // Compact for caching. return inclusions[inclIndex] = intPropIncl.compact(); }
Example #19
Source File: CharacterPropertiesImpl.java From trekarta with GNU General Public License v3.0 | 5 votes |
/** * Returns a mutable UnicodeSet -- do not modify! */ public static synchronized UnicodeSet getInclusionsForProperty(int prop) { if (UProperty.INT_START <= prop && prop < UProperty.INT_LIMIT) { return getIntPropInclusions(prop); } else { int src = UCharacterProperty.INSTANCE.getSource(prop); return getInclusionsForSource(src); } }
Example #20
Source File: BurmeseBreakEngine.java From fitnotifications with Apache License 2.0 | 5 votes |
@Override public boolean handles(int c, int breakType) { if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) { int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT); return (script == UScript.MYANMAR); } return false; }
Example #21
Source File: UCharacterProperty.java From fitnotifications with Apache License 2.0 | 5 votes |
public int getIntPropertyMaxValue(int which) { if(which<UProperty.INT_START) { if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) { return 1; // maximum TRUE for all binary properties } } else if(which<UProperty.INT_LIMIT) { return intProps[which-UProperty.INT_START].getMaxValue(which); } return -1; // undefined }
Example #22
Source File: ThaiBreakEngine.java From fitnotifications with Apache License 2.0 | 5 votes |
public boolean handles(int c, int breakType) { if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) { int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT); return (script == UScript.THAI); } return false; }
Example #23
Source File: SpoofChecker.java From fitnotifications with Apache License 2.0 | 5 votes |
private void addScriptChars(ULocale locale, UnicodeSet allowedChars) { int scripts[] = UScript.getCode(locale); if (scripts != null) { UnicodeSet tmpSet = new UnicodeSet(); for (int i = 0; i < scripts.length; i++) { tmpSet.applyIntPropertyValue(UProperty.SCRIPT, scripts[i]); allowedChars.addAll(tmpSet); } } // else it's an unknown script. // Maybe they asked for the script of "zxx", which refers to no linguistic content. // Maybe they asked for the script of a newer locale that we don't know in the older version of ICU. }
Example #24
Source File: SpoofChecker.java From fitnotifications with Apache License 2.0 | 5 votes |
/** * Limit characters that are acceptable in identifiers being checked to those normally used with the languages * associated with the specified locales. Any previously specified list of locales is replaced by the new * settings. * * A set of languages is determined from the locale(s), and from those a set of acceptable Unicode scripts is * determined. Characters from this set of scripts, along with characters from the "common" and "inherited" * Unicode Script categories will be permitted. * * Supplying an empty string removes all restrictions; characters from any script will be allowed. * * The {@link #CHAR_LIMIT} test is automatically enabled for this SpoofChecker when calling this function with a * non-empty list of locales. * * The Unicode Set of characters that will be allowed is accessible via the {@link #getAllowedChars} function. * setAllowedLocales() will <i>replace</i> any previously applied set of allowed characters. * * Adjustments, such as additions or deletions of certain classes of characters, can be made to the result of * {@link #setAllowedChars} by fetching the resulting set with {@link #getAllowedChars}, manipulating it with * the Unicode Set API, then resetting the spoof detectors limits with {@link #setAllowedChars}. * * @param locales * A Set of ULocales, from which the language and associated script are extracted. If the locales Set * is null, no restrictions will be placed on the allowed characters. * * @return self * @stable ICU 4.6 */ public Builder setAllowedLocales(Set<ULocale> locales) { fAllowedCharsSet.clear(); for (ULocale locale : locales) { // Add the script chars for this locale to the accumulating set // of allowed chars. addScriptChars(locale, fAllowedCharsSet); } // If our caller provided an empty list of locales, we disable the // allowed characters checking fAllowedLocales.clear(); if (locales.size() == 0) { fAllowedCharsSet.add(0, 0x10ffff); fChecks &= ~CHAR_LIMIT; return this; } // Add all common and inherited characters to the set of allowed // chars. UnicodeSet tempSet = new UnicodeSet(); tempSet.applyIntPropertyValue(UProperty.SCRIPT, UScript.COMMON); fAllowedCharsSet.addAll(tempSet); tempSet.applyIntPropertyValue(UProperty.SCRIPT, UScript.INHERITED); fAllowedCharsSet.addAll(tempSet); // Store the updated spoof checker state. fAllowedLocales.clear(); fAllowedLocales.addAll(locales); fChecks |= CHAR_LIMIT; return this; }
Example #25
Source File: UPropertyAliases.java From fitnotifications with Apache License 2.0 | 5 votes |
private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) { BytesTrie trie=new BytesTrie(bytesTries, bytesTrieOffset); if(containsName(trie, alias)) { return trie.getValue(); } else { return UProperty.UNDEFINED; } }
Example #26
Source File: UPropertyAliases.java From fitnotifications with Apache License 2.0 | 5 votes |
/** * Returns a value enum given a property enum and one of its value names. Does not throw. * @return value enum, or UProperty.UNDEFINED if not defined for that property */ public int getPropertyValueEnumNoThrow(int property, CharSequence alias) { int valueMapIndex=findProperty(property); if(valueMapIndex==0) { return UProperty.UNDEFINED; } valueMapIndex=valueMaps[valueMapIndex+1]; if(valueMapIndex==0) { return UProperty.UNDEFINED; } // valueMapIndex is the start of the property's valueMap, // where the first word is the BytesTrie offset. return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); }
Example #27
Source File: UCharacterProperty.java From fitnotifications with Apache License 2.0 | 5 votes |
public boolean hasBinaryProperty(int c, int which) { if(which<UProperty.BINARY_START || UProperty.BINARY_LIMIT<=which) { // not a known binary property return false; } else { return binProps[which].contains(c); } }
Example #28
Source File: LaoBreakEngine.java From fitnotifications with Apache License 2.0 | 5 votes |
public boolean handles(int c, int breakType) { if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) { int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT); return (script == UScript.LAO); } return false; }
Example #29
Source File: UnhandledBreakEngine.java From fitnotifications with Apache License 2.0 | 5 votes |
public synchronized void handleChar(int c, int breakType) { if (breakType >= 0 && breakType < fHandled.length && c != DONE32) { if (!fHandled[breakType].contains(c)) { int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT); fHandled[breakType].applyIntPropertyValue(UProperty.SCRIPT, script); } } }
Example #30
Source File: UCharacterProperty.java From fitnotifications with Apache License 2.0 | 5 votes |
public int getIntPropertyValue(int c, int which) { if(which<UProperty.INT_START) { if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) { return binProps[which].contains(c) ? 1 : 0; } } else if(which<UProperty.INT_LIMIT) { return intProps[which-UProperty.INT_START].getValue(c); } else if (which == UProperty.GENERAL_CATEGORY_MASK) { return getMask(getType(c)); } return 0; // undefined }