sun.text.normalizer.UCharacterIterator#getInstance

Source File: NFS4StringPrep.java From hottub with GNU General Public License v2.0

6 votes

public static byte[] mixed_prepare(byte[] src)
                     throws IOException, ParseException, UnsupportedEncodingException{
    String s = new String(src, "UTF-8");
    int index = s.indexOf(AT_SIGN);
    StringBuffer out = new StringBuffer();

    if(index > -1){
        /* special prefixes must not be followed by suffixes! */
        String prefixString = s.substring(0,index);
        int i= findStringIndex(special_prefixes, prefixString);
        String suffixString = s.substring(index+1, s.length());
        if(i>-1 && !suffixString.equals("")){
            throw new ParseException("Suffix following a special index", -1);
        }
        UCharacterIterator prefix = UCharacterIterator.getInstance(prefixString);
        UCharacterIterator suffix = UCharacterIterator.getInstance(suffixString);
        out.append(prep.nfsmxp.prepare(prefix,StringPrep.DEFAULT));
        out.append(AT_SIGN); // add the delimiter
        out.append(prep.nfsmxs.prepare(suffix, StringPrep.DEFAULT));
    }else{
        UCharacterIterator iter = UCharacterIterator.getInstance(s);
        out.append(prep.nfsmxp.prepare(iter,StringPrep.DEFAULT));

    }
   return out.toString().getBytes("UTF-8");
}

Source File: NFS4StringPrep.java From jdk8u-dev-jdk with GNU General Public License v2.0

6 votes

public static byte[] mixed_prepare(byte[] src)
                     throws IOException, ParseException, UnsupportedEncodingException{
    String s = new String(src, "UTF-8");
    int index = s.indexOf(AT_SIGN);
    StringBuffer out = new StringBuffer();

    if(index > -1){
        /* special prefixes must not be followed by suffixes! */
        String prefixString = s.substring(0,index);
        int i= findStringIndex(special_prefixes, prefixString);
        String suffixString = s.substring(index+1, s.length());
        if(i>-1 && !suffixString.equals("")){
            throw new ParseException("Suffix following a special index", -1);
        }
        UCharacterIterator prefix = UCharacterIterator.getInstance(prefixString);
        UCharacterIterator suffix = UCharacterIterator.getInstance(suffixString);
        out.append(prep.nfsmxp.prepare(prefix,StringPrep.DEFAULT));
        out.append(AT_SIGN); // add the delimiter
        out.append(prep.nfsmxs.prepare(suffix, StringPrep.DEFAULT));
    }else{
        UCharacterIterator iter = UCharacterIterator.getInstance(s);
        out.append(prep.nfsmxp.prepare(iter,StringPrep.DEFAULT));

    }
   return out.toString().getBytes("UTF-8");
}

Source File: NFS4StringPrep.java From openjdk-jdk8u-backup with GNU General Public License v2.0

6 votes

public static byte[] mixed_prepare(byte[] src)
                     throws IOException, ParseException, UnsupportedEncodingException{
    String s = new String(src, "UTF-8");
    int index = s.indexOf(AT_SIGN);
    StringBuffer out = new StringBuffer();

    if(index > -1){
        /* special prefixes must not be followed by suffixes! */
        String prefixString = s.substring(0,index);
        int i= findStringIndex(special_prefixes, prefixString);
        String suffixString = s.substring(index+1, s.length());
        if(i>-1 && !suffixString.equals("")){
            throw new ParseException("Suffix following a special index", -1);
        }
        UCharacterIterator prefix = UCharacterIterator.getInstance(prefixString);
        UCharacterIterator suffix = UCharacterIterator.getInstance(suffixString);
        out.append(prep.nfsmxp.prepare(prefix,StringPrep.DEFAULT));
        out.append(AT_SIGN); // add the delimiter
        out.append(prep.nfsmxs.prepare(suffix, StringPrep.DEFAULT));
    }else{
        UCharacterIterator iter = UCharacterIterator.getInstance(s);
        out.append(prep.nfsmxp.prepare(iter,StringPrep.DEFAULT));

    }
   return out.toString().getBytes("UTF-8");
}

Source File: TestStringPrep.java From dragonwell8_jdk with GNU General Public License v2.0

6 votes

public static void TestNamePrepConformance() throws Exception {
    InputStream stream = StringPrep.class.getResourceAsStream("uidna.spp");
    StringPrep namePrep = new StringPrep(stream);
    stream.close();
    int i;
    for(i=0; i<TestData.conformanceTestCases.length;i++){
        TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
        try{
            UCharacterIterator iter = UCharacterIterator.getInstance(testCase.input);
            StringBuffer output = namePrep.prepare(iter, testCase.flags);
            if(testCase.output !=null && output!=null && !testCase.output.equals(output.toString())){
                fail("Did not get the expected output. Expected: " + prettify(testCase.output)+
                        " Got: "+ prettify(output.toString()) );
            }
        } catch(ParseException ex) {
            if (testCase.expected == null) {
                fail("get the unexpected exception for source: " +testCase.input +" Got:  "+ ex.toString());
            }
        }
    }
    System.out.println("Nameprep test count: " + i);
}

Source File: TestStringPrep.java From openjdk-jdk8u-backup with GNU General Public License v2.0

6 votes

public static void TestNamePrepConformance() throws Exception {
    InputStream stream = StringPrep.class.getResourceAsStream("uidna.spp");
    StringPrep namePrep = new StringPrep(stream);
    stream.close();
    int i;
    for(i=0; i<TestData.conformanceTestCases.length;i++){
        TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
        try{
            UCharacterIterator iter = UCharacterIterator.getInstance(testCase.input);
            StringBuffer output = namePrep.prepare(iter, testCase.flags);
            if(testCase.output !=null && output!=null && !testCase.output.equals(output.toString())){
                fail("Did not get the expected output. Expected: " + prettify(testCase.output)+
                        " Got: "+ prettify(output.toString()) );
            }
        } catch(ParseException ex) {
            if (testCase.expected == null) {
                fail("get the unexpected exception for source: " +testCase.input +" Got:  "+ ex.toString());
            }
        }
    }
    System.out.println("Nameprep test count: " + i);
}

Source File: TestStringPrep.java From jdk8u60 with GNU General Public License v2.0

6 votes

public static void TestNamePrepConformance() throws Exception {
    InputStream stream = StringPrep.class.getResourceAsStream("uidna.spp");
    StringPrep namePrep = new StringPrep(stream);
    stream.close();
    int i;
    for(i=0; i<TestData.conformanceTestCases.length;i++){
        TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
        try{
            UCharacterIterator iter = UCharacterIterator.getInstance(testCase.input);
            StringBuffer output = namePrep.prepare(iter, testCase.flags);
            if(testCase.output !=null && output!=null && !testCase.output.equals(output.toString())){
                fail("Did not get the expected output. Expected: " + prettify(testCase.output)+
                        " Got: "+ prettify(output.toString()) );
            }
        } catch(ParseException ex) {
            if (testCase.expected == null) {
                fail("get the unexpected exception for source: " +testCase.input +" Got:  "+ ex.toString());
            }
        }
    }
    System.out.println("Nameprep test count: " + i);
}

Source File: NFS4StringPrep.java From jdk8u60 with GNU General Public License v2.0

6 votes

public static byte[] mixed_prepare(byte[] src)
                     throws IOException, ParseException, UnsupportedEncodingException{
    String s = new String(src, "UTF-8");
    int index = s.indexOf(AT_SIGN);
    StringBuffer out = new StringBuffer();

    if(index > -1){
        /* special prefixes must not be followed by suffixes! */
        String prefixString = s.substring(0,index);
        int i= findStringIndex(special_prefixes, prefixString);
        String suffixString = s.substring(index+1, s.length());
        if(i>-1 && !suffixString.equals("")){
            throw new ParseException("Suffix following a special index", -1);
        }
        UCharacterIterator prefix = UCharacterIterator.getInstance(prefixString);
        UCharacterIterator suffix = UCharacterIterator.getInstance(suffixString);
        out.append(prep.nfsmxp.prepare(prefix,StringPrep.DEFAULT));
        out.append(AT_SIGN); // add the delimiter
        out.append(prep.nfsmxs.prepare(suffix, StringPrep.DEFAULT));
    }else{
        UCharacterIterator iter = UCharacterIterator.getInstance(s);
        out.append(prep.nfsmxp.prepare(iter,StringPrep.DEFAULT));

    }
   return out.toString().getBytes("UTF-8");
}

Source File: TestStringPrep.java From jdk8u_jdk with GNU General Public License v2.0

6 votes

public static void TestNamePrepConformance() throws Exception {
    InputStream stream = StringPrep.class.getResourceAsStream("uidna.spp");
    StringPrep namePrep = new StringPrep(stream);
    stream.close();
    int i;
    for(i=0; i<TestData.conformanceTestCases.length;i++){
        TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
        try{
            UCharacterIterator iter = UCharacterIterator.getInstance(testCase.input);
            StringBuffer output = namePrep.prepare(iter, testCase.flags);
            if(testCase.output !=null && output!=null && !testCase.output.equals(output.toString())){
                fail("Did not get the expected output. Expected: " + prettify(testCase.output)+
                        " Got: "+ prettify(output.toString()) );
            }
        } catch(ParseException ex) {
            if (testCase.expected == null) {
                fail("get the unexpected exception for source: " +testCase.input +" Got:  "+ ex.toString());
            }
        }
    }
    System.out.println("Nameprep test count: " + i);
}

Source File: NFS4StringPrep.java From dragonwell8_jdk with GNU General Public License v2.0

5 votes

private static byte[] prepare(byte[] src, StringPrep prep)
            throws ParseException, UnsupportedEncodingException{
    String s = new String(src, "UTF-8");
    UCharacterIterator iter =  UCharacterIterator.getInstance(s);
    StringBuffer out = prep.prepare(iter,StringPrep.DEFAULT);
    return out.toString().getBytes("UTF-8");
}

Source File: NFS4StringPrep.java From TencentKona-8 with GNU General Public License v2.0

5 votes

private static byte[] prepare(byte[] src, StringPrep prep)
            throws ParseException, UnsupportedEncodingException{
    String s = new String(src, "UTF-8");
    UCharacterIterator iter =  UCharacterIterator.getInstance(s);
    StringBuffer out = prep.prepare(iter,StringPrep.DEFAULT);
    return out.toString().getBytes("UTF-8");
}

Source File: NFS4StringPrep.java From jdk8u-jdk with GNU General Public License v2.0

5 votes

private static byte[] prepare(byte[] src, StringPrep prep)
            throws ParseException, UnsupportedEncodingException{
    String s = new String(src, "UTF-8");
    UCharacterIterator iter =  UCharacterIterator.getInstance(s);
    StringBuffer out = prep.prepare(iter,StringPrep.DEFAULT);
    return out.toString().getBytes("UTF-8");
}

Source File: StringPrep.java From openjdk-8 with GNU General Public License v2.0

4 votes

/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }

Source File: IDN.java From Bytecoder with Apache License 2.0

4 votes

private static String toUnicodeInternal(String label, int flag) {
    boolean[] caseFlags = null;
    StringBuffer dest;

    // step 1
    // find out if all the codepoints in input are ASCII
    boolean isASCII = isAllASCII(label);

    if(!isASCII){
        // step 2
        // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
        try {
            UCharacterIterator iter = UCharacterIterator.getInstance(label);
            dest = namePrep.prepare(iter, flag);
        } catch (Exception e) {
            // toUnicode never fails; if any step fails, return the input string
            return label;
        }
    } else {
        dest = new StringBuffer(label);
    }

    // step 3
    // verify ACE Prefix
    if(startsWithACEPrefix(dest)) {

        // step 4
        // Remove the ACE Prefix
        String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length());

        try {
            // step 5
            // Decode using punycode
            StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);

            // step 6
            // Apply toASCII
            String toASCIIOut = toASCII(decodeOut.toString(), flag);

            // step 7
            // verify
            if (toASCIIOut.equalsIgnoreCase(dest.toString())) {
                // step 8
                // return output of step 5
                return decodeOut.toString();
            }
        } catch (Exception ignored) {
            // no-op
        }
    }

    // just return the input
    return label;
}

Source File: StringPrep.java From openjdk-jdk9 with GNU General Public License v2.0

4 votes

/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }

Source File: IDN.java From jdk8u60 with GNU General Public License v2.0

4 votes

private static String toUnicodeInternal(String label, int flag) {
    boolean[] caseFlags = null;
    StringBuffer dest;

    // step 1
    // find out if all the codepoints in input are ASCII
    boolean isASCII = isAllASCII(label);

    if(!isASCII){
        // step 2
        // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
        try {
            UCharacterIterator iter = UCharacterIterator.getInstance(label);
            dest = namePrep.prepare(iter, flag);
        } catch (Exception e) {
            // toUnicode never fails; if any step fails, return the input string
            return label;
        }
    } else {
        dest = new StringBuffer(label);
    }

    // step 3
    // verify ACE Prefix
    if(startsWithACEPrefix(dest)) {

        // step 4
        // Remove the ACE Prefix
        String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length());

        try {
            // step 5
            // Decode using punycode
            StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);

            // step 6
            // Apply toASCII
            String toASCIIOut = toASCII(decodeOut.toString(), flag);

            // step 7
            // verify
            if (toASCIIOut.equalsIgnoreCase(dest.toString())) {
                // step 8
                // return output of step 5
                return decodeOut.toString();
            }
        } catch (Exception ignored) {
            // no-op
        }
    }

    // just return the input
    return label;
}

Source File: IDN.java From Java8CN with Apache License 2.0

4 votes

private static String toUnicodeInternal(String label, int flag) {
    boolean[] caseFlags = null;
    StringBuffer dest;

    // step 1
    // find out if all the codepoints in input are ASCII
    boolean isASCII = isAllASCII(label);

    if(!isASCII){
        // step 2
        // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
        try {
            UCharacterIterator iter = UCharacterIterator.getInstance(label);
            dest = namePrep.prepare(iter, flag);
        } catch (Exception e) {
            // toUnicode never fails; if any step fails, return the input string
            return label;
        }
    } else {
        dest = new StringBuffer(label);
    }

    // step 3
    // verify ACE Prefix
    if(startsWithACEPrefix(dest)) {

        // step 4
        // Remove the ACE Prefix
        String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length());

        try {
            // step 5
            // Decode using punycode
            StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);

            // step 6
            // Apply toASCII
            String toASCIIOut = toASCII(decodeOut.toString(), flag);

            // step 7
            // verify
            if (toASCIIOut.equalsIgnoreCase(dest.toString())) {
                // step 8
                // return output of step 5
                return decodeOut.toString();
            }
        } catch (Exception ignored) {
            // no-op
        }
    }

    // just return the input
    return label;
}

Source File: StringPrep.java From TencentKona-8 with GNU General Public License v2.0

4 votes

/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }

Source File: IDN.java From openjdk-8-source with GNU General Public License v2.0

4 votes

private static String toUnicodeInternal(String label, int flag) {
    boolean[] caseFlags = null;
    StringBuffer dest;

    // step 1
    // find out if all the codepoints in input are ASCII
    boolean isASCII = isAllASCII(label);

    if(!isASCII){
        // step 2
        // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
        try {
            UCharacterIterator iter = UCharacterIterator.getInstance(label);
            dest = namePrep.prepare(iter, flag);
        } catch (Exception e) {
            // toUnicode never fails; if any step fails, return the input string
            return label;
        }
    } else {
        dest = new StringBuffer(label);
    }

    // step 3
    // verify ACE Prefix
    if(startsWithACEPrefix(dest)) {

        // step 4
        // Remove the ACE Prefix
        String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length());

        try {
            // step 5
            // Decode using punycode
            StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);

            // step 6
            // Apply toASCII
            String toASCIIOut = toASCII(decodeOut.toString(), flag);

            // step 7
            // verify
            if (toASCIIOut.equalsIgnoreCase(dest.toString())) {
                // step 8
                // return output of step 5
                return decodeOut.toString();
            }
        } catch (Exception ignored) {
            // no-op
        }
    }

    // just return the input
    return label;
}

Source File: StringPrep.java From dragonwell8_jdk with GNU General Public License v2.0

4 votes

/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }

Source File: IDN.java From openjdk-8 with GNU General Public License v2.0

4 votes

private static String toUnicodeInternal(String label, int flag) {
    boolean[] caseFlags = null;
    StringBuffer dest;

    // step 1
    // find out if all the codepoints in input are ASCII
    boolean isASCII = isAllASCII(label);

    if(!isASCII){
        // step 2
        // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
        try {
            UCharacterIterator iter = UCharacterIterator.getInstance(label);
            dest = namePrep.prepare(iter, flag);
        } catch (Exception e) {
            // toUnicode never fails; if any step fails, return the input string
            return label;
        }
    } else {
        dest = new StringBuffer(label);
    }

    // step 3
    // verify ACE Prefix
    if(startsWithACEPrefix(dest)) {

        // step 4
        // Remove the ACE Prefix
        String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length());

        try {
            // step 5
            // Decode using punycode
            StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);

            // step 6
            // Apply toASCII
            String toASCIIOut = toASCII(decodeOut.toString(), flag);

            // step 7
            // verify
            if (toASCIIOut.equalsIgnoreCase(dest.toString())) {
                // step 8
                // return output of step 5
                return decodeOut.toString();
            }
        } catch (Exception ignored) {
            // no-op
        }
    }

    // just return the input
    return label;
}

Java Code Examples for sun.text.normalizer.UCharacterIterator#getInstance()