android.icu.lang.UCharacter#getType

Source File: AlphabeticIndex.java From j2objc with Apache License 2.0

6 votes

/**
 * Return a list of the first character in each script. Only exposed for testing.
 *
 * @return list of first characters in each script
 * @deprecated This API is ICU internal, only for testing.
 * @hide original deprecated declaration
 * @hide draft / provisional / internal are hidden on Android
 */
@Deprecated
public List<String> getFirstCharactersInScripts() {
    List<String> dest = new ArrayList<String>(200);
    // Fetch the script-first-primary contractions which are defined in the root collator.
    // They all start with U+FDD1.
    UnicodeSet set = new UnicodeSet();
    collatorPrimaryOnly.internalAddContractions(0xFDD1, set);
    if (set.isEmpty()) {
        throw new UnsupportedOperationException(
                "AlphabeticIndex requires script-first-primary contractions");
    }
    for (String boundary : set) {
        int gcMask = 1 << UCharacter.getType(boundary.codePointAt(1));
        if ((gcMask & (GC_L_MASK | GC_CN_MASK)) == 0) {
            // Ignore boundaries for the special reordering groups.
            // Take only those for "real scripts" (where the sample character is a Letter,
            // and the one for unassigned implicit weights (Cn).
            continue;
        }
        dest.add(boundary);
    }
    return dest;
}

Source File: SpoofChecker.java From j2objc with Apache License 2.0

6 votes

/**
 * Computes the set of numerics for a string, according to UTS 39 section 5.3.
 */
private void getNumerics(String input, UnicodeSet result) {
    result.clear();

    for (int utf16Offset = 0; utf16Offset < input.length();) {
        int codePoint = Character.codePointAt(input, utf16Offset);
        utf16Offset += Character.charCount(codePoint);

        // Store a representative character for each kind of decimal digit
        if (UCharacter.getType(codePoint) == UCharacterCategory.DECIMAL_DIGIT_NUMBER) {
            // Store the zero character as a representative for comparison.
            // Unicode guarantees it is codePoint - value
            result.add(codePoint - UCharacter.getNumericValue(codePoint));
        }
    }
}

Source File: UCharacterName.java From j2objc with Apache License 2.0

6 votes

/**
* Gets the character extended type
* @param ch character to be tested
* @return extended type it is associated with
*/
private static int getType(int ch)
{
    if (UCharacterUtility.isNonCharacter(ch)) {
        // not a character we return a invalid category count
        return NON_CHARACTER_;
    }
    int result = UCharacter.getType(ch);
    if (result == UCharacterCategory.SURROGATE) {
        if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
            result = LEAD_SURROGATE_;
        }
        else {
            result = TRAIL_SURROGATE_;
        }
    }
    return result;
}

Source File: NormalizationMonkeyTest.java From j2objc with Apache License 2.0

6 votes

String getTestSource() {
if (random == null) {
    random = createRandom(); // use test framework's random seed
}
    String source = "";
    int i = 0;
    while (i < (random.nextInt(maxCharCount) + 1)) {
        int codepoint = random.nextInt(maxCodePoint);
        //Elimate unassigned characters
        while (UCharacter.getType(codepoint) == UCharacterCategory.UNASSIGNED) {
            codepoint = random.nextInt(maxCodePoint);
        }
        source = source + UTF16.valueOf(codepoint);
        i++;
    }
    return source;
}

Source File: RoundTripTest.java From j2objc with Apache License 2.0

6 votes

public static boolean isCamel(String a) {
    //System.out.println("CamelTest");
    // see if string is of the form aB; e.g. lower, then upper or title
    int cp;
    boolean haveLower = false;
    for (int i = 0; i < a.length(); i += UTF16.getCharCount(cp)) {
        cp = UTF16.charAt(a, i);
        int t = UCharacter.getType(cp);
        //System.out.println("\t" + t + " " + Integer.toString(cp,16) + " " + UCharacter.getName(cp));
        switch (t) {
        case Character.UPPERCASE_LETTER:
            if (haveLower) return true;
            break;
        case Character.TITLECASE_LETTER:
            if (haveLower) return true;
            // drop through, since second letter is lower.
        case Character.LOWERCASE_LETTER:
            haveLower = true;
            break;
        }
    }
    //System.out.println("FALSE");
    return false;
}

Source File: UCharacterProperty.java From j2objc with Apache License 2.0

5 votes

@Override
boolean contains(int c) {
    // "horizontal space"
    if(c<=0x9f) {
        return c==9 || c==0x20; /* TAB or SPACE */
    } else {
        /* Zs */
        return UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR;
    }
}

Source File: UCharacterProperty.java From j2objc with Apache License 2.0

5 votes

@Override
boolean contains(int c) {
    /*
     * Checks if codepoint is in \p{graph}\p{blank} - \p{cntrl}.
     *
     * The only cntrl character in graph+blank is TAB (in blank).
     * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
     */
    return (UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR) || isgraphPOSIX(c);
}

Source File: UCharacterProperty.java From j2objc with Apache License 2.0

5 votes

@Override
boolean contains(int c) {
    /* check ASCII and Fullwidth ASCII a-fA-F */
    if(
        (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) ||
        (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41))
    ) {
        return true;
    }
    return UCharacter.getType(c)==UCharacter.DECIMAL_DIGIT_NUMBER;
}

Source File: TestCanonicalIterator.java From j2objc with Apache License 2.0

5 votes

@Test
public void TestExhaustive() {
    int counter = 0;
    CanonicalIterator it = new CanonicalIterator("");
    /*
    CanonicalIterator slowIt = new CanonicalIterator("");
    slowIt.SKIP_ZEROS = false;
    */
    //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name");
    //Set itSet = new TreeSet();
    //Set slowItSet = new TreeSet();


    for (int i = 0; i < 0x10FFFF; ++i) {

        // skip characters we know don't have decomps
        int type = UCharacter.getType(i);
        if (type == Character.UNASSIGNED || type == Character.PRIVATE_USE
            || type == Character.SURROGATE) continue;

        if ((++counter % 5000) == 0) logln("Testing " + Utility.hex(i,0));

        String s = UTF16.valueOf(i);
        characterTest(s, i, it);

        characterTest(s + "\u0345", i, it);
    }
}

Source File: UnicodeSetTest.java From j2objc with Apache License 2.0

5 votes

@Test
public void TestCategories() {
    int failures = 0;
    UnicodeSet set = new UnicodeSet("[:Lu:]");
    expectContainment(set, "ABC", "abc");

    // Make sure generation of L doesn't pollute cached Lu set
    // First generate L, then Lu
    // not used int TOP = 0x200; // Don't need to go over the whole range:
    set = new UnicodeSet("[:L:]");
    for (int i=0; i<0x200; ++i) {
        boolean l = UCharacter.isLetter(i);
        if (l != set.contains((char)i)) {
            errln("FAIL: L contains " + (char)i + " = " + 
                    set.contains((char)i));
            if (++failures == 10) break;
        }
    }

    set = new UnicodeSet("[:Lu:]");
    for (int i=0; i<0x200; ++i) {
        boolean lu = (UCharacter.getType(i) == ECharacterCategory.UPPERCASE_LETTER);
        if (lu != set.contains((char)i)) {
            errln("FAIL: Lu contains " + (char)i + " = " + 
                    set.contains((char)i));
            if (++failures == 20) break;
        }
    }
}

Source File: UCharacterTest.java From j2objc with Apache License 2.0

5 votes

@Test
public void TestGetProperty(){
    int[] cases = {UTF16.CODEPOINT_MAX_VALUE+1, UTF16.CODEPOINT_MAX_VALUE+2};
    for(int i=0; i < cases.length; i++)
        if(UCharacter.getType(cases[i]) != 0)
            errln("UCharacter.getType for testing UCharacter.getProperty "
                    + "did not return 0 for passed value of " + cases[i] +
                    " but got " + UCharacter.getType(cases[i]));
}

Source File: BreakTransliterator.java From j2objc with Apache License 2.0

4 votes

@Override
protected synchronized void handleTransliterate(Replaceable text, Position pos, boolean incremental) {
    boundaryCount = 0;
    int boundary = 0;
    getBreakIterator(); // Lazy-create it if necessary
    bi.setText(new ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start));
    // TODO: fix clumsy workaround used below.
    /*
    char[] tempBuffer = new char[text.length()];
    text.getChars(0, text.length(), tempBuffer, 0);
    bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start));
    */
    // end debugging

    // To make things much easier, we will stack the boundaries, and then insert at the end.
    // generally, we won't need too many, since we will be filtered.

    for(boundary = bi.first(); boundary != BreakIterator.DONE && boundary < pos.limit; boundary = bi.next()) {
        if (boundary == 0) continue;
        // HACK: Check to see that preceeding item was a letter

        int cp = UTF16.charAt(text, boundary-1);
        int type = UCharacter.getType(cp);
        //System.out.println(Integer.toString(cp,16) + " (before): " + type);
        if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;

        cp = UTF16.charAt(text, boundary);
        type = UCharacter.getType(cp);
        //System.out.println(Integer.toString(cp,16) + " (after): " + type);
        if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;

        if (boundaryCount >= boundaries.length) {       // realloc if necessary
            int[] temp = new int[boundaries.length * 2];
            System.arraycopy(boundaries, 0, temp, 0, boundaries.length);
            boundaries = temp;
        }

        boundaries[boundaryCount++] = boundary;
        //System.out.println(boundary);
    }

    int delta = 0;
    int lastBoundary = 0;

    if (boundaryCount != 0) { // if we found something, adjust
        delta = boundaryCount * insertion.length();
        lastBoundary = boundaries[boundaryCount-1];

        // we do this from the end backwards, so that we don't have to keep updating.

        while (boundaryCount > 0) {
            boundary = boundaries[--boundaryCount];
            text.replace(boundary, boundary, insertion);
        }
    }

    // Now fix up the return values
    pos.contextLimit += delta;
    pos.limit += delta;
    pos.start = incremental ? lastBoundary + delta : pos.limit;
}

Source File: UnicodeSet.java From j2objc with Apache License 2.0

4 votes

@Override
public boolean contains(int ch) {
    return ((1 << UCharacter.getType(ch)) & mask) != 0;
}

Source File: UTS46.java From j2objc with Apache License 2.0

4 votes

private static int U_GET_GC_MASK(int c) {
    return (1<<UCharacter.getType(c));
}

Source File: BasicTest.java From j2objc with Apache License 2.0

4 votes

int countFoldFCDExceptions(int foldingOptions) {
    String s, d;
    int c;
    int count;
    int/*unsigned*/ cc, trailCC, foldCC, foldTrailCC;
    Normalizer.QuickCheckResult qcResult;
    int category;
    boolean isNFD;


    logln("Test if case folding may un-FCD a string (folding options 0x)"+hex(foldingOptions));

    count=0;
    for(c=0; c<=0x10ffff; ++c) {
        category=UCharacter.getType(c);
        if(category==UCharacterCategory.UNASSIGNED) {
            continue; // skip unassigned code points
        }
        if(c==0xac00) {
            c=0xd7a3; // skip Hangul - no case folding there
            continue;
        }
        // skip Han blocks - no case folding there either
        if(c==0x3400) {
            c=0x4db5;
            continue;
        }
        if(c==0x4e00) {
            c=0x9fa5;
            continue;
        }
        if(c==0x20000) {
            c=0x2a6d6;
            continue;
        }

        s= UTF16.valueOf(c);

        // get leading and trailing cc for c
        d= Normalizer.decompose(s,false);
        isNFD= s==d;
        cc=UCharacter.getCombiningClass(UTF16.charAt(d,0));
        trailCC=UCharacter.getCombiningClass(UTF16.charAt(d,d.length()-1));

        // get leading and trailing cc for the case-folding of c
        UCharacter.foldCase(s,(foldingOptions==0));
        d = Normalizer.decompose(s, false);
        foldCC=UCharacter.getCombiningClass(UTF16.charAt(d,0));
        foldTrailCC=UCharacter.getCombiningClass(UTF16.charAt(d,d.length()-1));

        qcResult=Normalizer.quickCheck(s, Normalizer.FCD,0);


        // bad:
        // - character maps to empty string: adjacent characters may then need reordering
        // - folding has different leading/trailing cc's, and they don't become just 0
        // - folding itself is not FCD
        if( qcResult!=Normalizer.YES ||
            s.length()==0 ||
            (cc!=foldCC && foldCC!=0) || (trailCC!=foldTrailCC && foldTrailCC!=0)
        ) {
            ++count;
            errln("U+"+hex(c)+": case-folding may un-FCD a string (folding options 0x"+hex(foldingOptions)+")");
            //errln("  cc %02x trailCC %02x    foldCC(U+%04lx) %02x foldTrailCC(U+%04lx) %02x   quickCheck(folded)=%d", cc, trailCC, UTF16.charAt(d,0), foldCC, UTF16.charAt(d,d.length()-1), foldTrailCC, qcResult);
            continue;
        }

        // also bad:
        // if a code point is in NFD but its case folding is not, then
        // unorm_compare will also fail
        if(isNFD && Normalizer.YES!=Normalizer.quickCheck(s, Normalizer.NFD,0)) {
            ++count;
            errln("U+"+hex(c)+": case-folding may un-FCD a string (folding options 0x"+hex(foldingOptions)+")");
        }
    }

    logln("There are "+hex(count)+" code points for which case-folding may un-FCD a string (folding options"+foldingOptions+"x)" );
    return count;
}

Java Code Examples for android.icu.lang.UCharacter#getType()