Java Code Examples for android.icu.lang.UCharacter#getType()
The following examples show how to use
android.icu.lang.UCharacter#getType() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AlphabeticIndex.java From j2objc with Apache License 2.0 | 6 votes |
/** * Return a list of the first character in each script. Only exposed for testing. * * @return list of first characters in each script * @deprecated This API is ICU internal, only for testing. * @hide original deprecated declaration * @hide draft / provisional / internal are hidden on Android */ @Deprecated public List<String> getFirstCharactersInScripts() { List<String> dest = new ArrayList<String>(200); // Fetch the script-first-primary contractions which are defined in the root collator. // They all start with U+FDD1. UnicodeSet set = new UnicodeSet(); collatorPrimaryOnly.internalAddContractions(0xFDD1, set); if (set.isEmpty()) { throw new UnsupportedOperationException( "AlphabeticIndex requires script-first-primary contractions"); } for (String boundary : set) { int gcMask = 1 << UCharacter.getType(boundary.codePointAt(1)); if ((gcMask & (GC_L_MASK | GC_CN_MASK)) == 0) { // Ignore boundaries for the special reordering groups. // Take only those for "real scripts" (where the sample character is a Letter, // and the one for unassigned implicit weights (Cn). continue; } dest.add(boundary); } return dest; }
Example 2
Source File: SpoofChecker.java From j2objc with Apache License 2.0 | 6 votes |
/** * Computes the set of numerics for a string, according to UTS 39 section 5.3. */ private void getNumerics(String input, UnicodeSet result) { result.clear(); for (int utf16Offset = 0; utf16Offset < input.length();) { int codePoint = Character.codePointAt(input, utf16Offset); utf16Offset += Character.charCount(codePoint); // Store a representative character for each kind of decimal digit if (UCharacter.getType(codePoint) == UCharacterCategory.DECIMAL_DIGIT_NUMBER) { // Store the zero character as a representative for comparison. // Unicode guarantees it is codePoint - value result.add(codePoint - UCharacter.getNumericValue(codePoint)); } } }
Example 3
Source File: UCharacterName.java From j2objc with Apache License 2.0 | 6 votes |
/** * Gets the character extended type * @param ch character to be tested * @return extended type it is associated with */ private static int getType(int ch) { if (UCharacterUtility.isNonCharacter(ch)) { // not a character we return a invalid category count return NON_CHARACTER_; } int result = UCharacter.getType(ch); if (result == UCharacterCategory.SURROGATE) { if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) { result = LEAD_SURROGATE_; } else { result = TRAIL_SURROGATE_; } } return result; }
Example 4
Source File: NormalizationMonkeyTest.java From j2objc with Apache License 2.0 | 6 votes |
String getTestSource() { if (random == null) { random = createRandom(); // use test framework's random seed } String source = ""; int i = 0; while (i < (random.nextInt(maxCharCount) + 1)) { int codepoint = random.nextInt(maxCodePoint); //Elimate unassigned characters while (UCharacter.getType(codepoint) == UCharacterCategory.UNASSIGNED) { codepoint = random.nextInt(maxCodePoint); } source = source + UTF16.valueOf(codepoint); i++; } return source; }
Example 5
Source File: RoundTripTest.java From j2objc with Apache License 2.0 | 6 votes |
public static boolean isCamel(String a) { //System.out.println("CamelTest"); // see if string is of the form aB; e.g. lower, then upper or title int cp; boolean haveLower = false; for (int i = 0; i < a.length(); i += UTF16.getCharCount(cp)) { cp = UTF16.charAt(a, i); int t = UCharacter.getType(cp); //System.out.println("\t" + t + " " + Integer.toString(cp,16) + " " + UCharacter.getName(cp)); switch (t) { case Character.UPPERCASE_LETTER: if (haveLower) return true; break; case Character.TITLECASE_LETTER: if (haveLower) return true; // drop through, since second letter is lower. case Character.LOWERCASE_LETTER: haveLower = true; break; } } //System.out.println("FALSE"); return false; }
Example 6
Source File: UCharacterProperty.java From j2objc with Apache License 2.0 | 5 votes |
@Override boolean contains(int c) { // "horizontal space" if(c<=0x9f) { return c==9 || c==0x20; /* TAB or SPACE */ } else { /* Zs */ return UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR; } }
Example 7
Source File: UCharacterProperty.java From j2objc with Apache License 2.0 | 5 votes |
@Override boolean contains(int c) { /* * Checks if codepoint is in \p{graph}\p{blank} - \p{cntrl}. * * The only cntrl character in graph+blank is TAB (in blank). * Here we implement (blank-TAB)=Zs instead of calling u_isblank(). */ return (UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR) || isgraphPOSIX(c); }
Example 8
Source File: UCharacterProperty.java From j2objc with Apache License 2.0 | 5 votes |
@Override boolean contains(int c) { /* check ASCII and Fullwidth ASCII a-fA-F */ if( (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) || (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) ) { return true; } return UCharacter.getType(c)==UCharacter.DECIMAL_DIGIT_NUMBER; }
Example 9
Source File: TestCanonicalIterator.java From j2objc with Apache License 2.0 | 5 votes |
@Test public void TestExhaustive() { int counter = 0; CanonicalIterator it = new CanonicalIterator(""); /* CanonicalIterator slowIt = new CanonicalIterator(""); slowIt.SKIP_ZEROS = false; */ //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name"); //Set itSet = new TreeSet(); //Set slowItSet = new TreeSet(); for (int i = 0; i < 0x10FFFF; ++i) { // skip characters we know don't have decomps int type = UCharacter.getType(i); if (type == Character.UNASSIGNED || type == Character.PRIVATE_USE || type == Character.SURROGATE) continue; if ((++counter % 5000) == 0) logln("Testing " + Utility.hex(i,0)); String s = UTF16.valueOf(i); characterTest(s, i, it); characterTest(s + "\u0345", i, it); } }
Example 10
Source File: UnicodeSetTest.java From j2objc with Apache License 2.0 | 5 votes |
@Test public void TestCategories() { int failures = 0; UnicodeSet set = new UnicodeSet("[:Lu:]"); expectContainment(set, "ABC", "abc"); // Make sure generation of L doesn't pollute cached Lu set // First generate L, then Lu // not used int TOP = 0x200; // Don't need to go over the whole range: set = new UnicodeSet("[:L:]"); for (int i=0; i<0x200; ++i) { boolean l = UCharacter.isLetter(i); if (l != set.contains((char)i)) { errln("FAIL: L contains " + (char)i + " = " + set.contains((char)i)); if (++failures == 10) break; } } set = new UnicodeSet("[:Lu:]"); for (int i=0; i<0x200; ++i) { boolean lu = (UCharacter.getType(i) == ECharacterCategory.UPPERCASE_LETTER); if (lu != set.contains((char)i)) { errln("FAIL: Lu contains " + (char)i + " = " + set.contains((char)i)); if (++failures == 20) break; } } }
Example 11
Source File: UCharacterTest.java From j2objc with Apache License 2.0 | 5 votes |
@Test public void TestGetProperty(){ int[] cases = {UTF16.CODEPOINT_MAX_VALUE+1, UTF16.CODEPOINT_MAX_VALUE+2}; for(int i=0; i < cases.length; i++) if(UCharacter.getType(cases[i]) != 0) errln("UCharacter.getType for testing UCharacter.getProperty " + "did not return 0 for passed value of " + cases[i] + " but got " + UCharacter.getType(cases[i])); }
Example 12
Source File: BreakTransliterator.java From j2objc with Apache License 2.0 | 4 votes |
@Override protected synchronized void handleTransliterate(Replaceable text, Position pos, boolean incremental) { boundaryCount = 0; int boundary = 0; getBreakIterator(); // Lazy-create it if necessary bi.setText(new ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start)); // TODO: fix clumsy workaround used below. /* char[] tempBuffer = new char[text.length()]; text.getChars(0, text.length(), tempBuffer, 0); bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start)); */ // end debugging // To make things much easier, we will stack the boundaries, and then insert at the end. // generally, we won't need too many, since we will be filtered. for(boundary = bi.first(); boundary != BreakIterator.DONE && boundary < pos.limit; boundary = bi.next()) { if (boundary == 0) continue; // HACK: Check to see that preceeding item was a letter int cp = UTF16.charAt(text, boundary-1); int type = UCharacter.getType(cp); //System.out.println(Integer.toString(cp,16) + " (before): " + type); if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue; cp = UTF16.charAt(text, boundary); type = UCharacter.getType(cp); //System.out.println(Integer.toString(cp,16) + " (after): " + type); if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue; if (boundaryCount >= boundaries.length) { // realloc if necessary int[] temp = new int[boundaries.length * 2]; System.arraycopy(boundaries, 0, temp, 0, boundaries.length); boundaries = temp; } boundaries[boundaryCount++] = boundary; //System.out.println(boundary); } int delta = 0; int lastBoundary = 0; if (boundaryCount != 0) { // if we found something, adjust delta = boundaryCount * insertion.length(); lastBoundary = boundaries[boundaryCount-1]; // we do this from the end backwards, so that we don't have to keep updating. while (boundaryCount > 0) { boundary = boundaries[--boundaryCount]; text.replace(boundary, boundary, insertion); } } // Now fix up the return values pos.contextLimit += delta; pos.limit += delta; pos.start = incremental ? lastBoundary + delta : pos.limit; }
Example 13
Source File: UnicodeSet.java From j2objc with Apache License 2.0 | 4 votes |
@Override public boolean contains(int ch) { return ((1 << UCharacter.getType(ch)) & mask) != 0; }
Example 14
Source File: UTS46.java From j2objc with Apache License 2.0 | 4 votes |
private static int U_GET_GC_MASK(int c) { return (1<<UCharacter.getType(c)); }
Example 15
Source File: BasicTest.java From j2objc with Apache License 2.0 | 4 votes |
int countFoldFCDExceptions(int foldingOptions) { String s, d; int c; int count; int/*unsigned*/ cc, trailCC, foldCC, foldTrailCC; Normalizer.QuickCheckResult qcResult; int category; boolean isNFD; logln("Test if case folding may un-FCD a string (folding options 0x)"+hex(foldingOptions)); count=0; for(c=0; c<=0x10ffff; ++c) { category=UCharacter.getType(c); if(category==UCharacterCategory.UNASSIGNED) { continue; // skip unassigned code points } if(c==0xac00) { c=0xd7a3; // skip Hangul - no case folding there continue; } // skip Han blocks - no case folding there either if(c==0x3400) { c=0x4db5; continue; } if(c==0x4e00) { c=0x9fa5; continue; } if(c==0x20000) { c=0x2a6d6; continue; } s= UTF16.valueOf(c); // get leading and trailing cc for c d= Normalizer.decompose(s,false); isNFD= s==d; cc=UCharacter.getCombiningClass(UTF16.charAt(d,0)); trailCC=UCharacter.getCombiningClass(UTF16.charAt(d,d.length()-1)); // get leading and trailing cc for the case-folding of c UCharacter.foldCase(s,(foldingOptions==0)); d = Normalizer.decompose(s, false); foldCC=UCharacter.getCombiningClass(UTF16.charAt(d,0)); foldTrailCC=UCharacter.getCombiningClass(UTF16.charAt(d,d.length()-1)); qcResult=Normalizer.quickCheck(s, Normalizer.FCD,0); // bad: // - character maps to empty string: adjacent characters may then need reordering // - folding has different leading/trailing cc's, and they don't become just 0 // - folding itself is not FCD if( qcResult!=Normalizer.YES || s.length()==0 || (cc!=foldCC && foldCC!=0) || (trailCC!=foldTrailCC && foldTrailCC!=0) ) { ++count; errln("U+"+hex(c)+": case-folding may un-FCD a string (folding options 0x"+hex(foldingOptions)+")"); //errln(" cc %02x trailCC %02x foldCC(U+%04lx) %02x foldTrailCC(U+%04lx) %02x quickCheck(folded)=%d", cc, trailCC, UTF16.charAt(d,0), foldCC, UTF16.charAt(d,d.length()-1), foldTrailCC, qcResult); continue; } // also bad: // if a code point is in NFD but its case folding is not, then // unorm_compare will also fail if(isNFD && Normalizer.YES!=Normalizer.quickCheck(s, Normalizer.NFD,0)) { ++count; errln("U+"+hex(c)+": case-folding may un-FCD a string (folding options 0x"+hex(foldingOptions)+")"); } } logln("There are "+hex(count)+" code points for which case-folding may un-FCD a string (folding options"+foldingOptions+"x)" ); return count; }