Java Code Examples for sun.text.normalizer.UCharacterIterator#getIndex()

The following examples show how to use sun.text.normalizer.UCharacterIterator#getIndex() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StringPrep.java    From openjdk-8 with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }
 
Example 2
Source File: StringPrep.java    From openjdk-8-source with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }
 
Example 3
Source File: StringPrep.java    From openjdk-8-source with GNU General Public License v2.0 4 votes vote down vote up
private StringBuffer map( UCharacterIterator iter, int options)
                        throws ParseException {

    Values val = new Values();
    char result = 0;
    int ch  = UCharacterIterator.DONE;
    StringBuffer dest = new StringBuffer();
    boolean allowUnassigned = ((options & ALLOW_UNASSIGNED)>0);

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){

        result = getCodePointValue(ch);
        getValues(result,val);

        // check if the source codepoint is unassigned
        if(val.type == UNASSIGNED && allowUnassigned == false){
             throw new ParseException("An unassigned code point was found in the input " +
                                      iter.getText(), iter.getIndex());
        }else if((val.type == MAP)){
            int index, length;

            if(val.isIndex){
                index = val.value;
                if(index >= indexes[ONE_UCHAR_MAPPING_INDEX_START] &&
                         index < indexes[TWO_UCHARS_MAPPING_INDEX_START]){
                    length = 1;
                }else if(index >= indexes[TWO_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[THREE_UCHARS_MAPPING_INDEX_START]){
                    length = 2;
                }else if(index >= indexes[THREE_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[FOUR_UCHARS_MAPPING_INDEX_START]){
                    length = 3;
                }else{
                    length = mappingData[index++];
                }
                /* copy mapping to destination */
                dest.append(mappingData,index,length);
                continue;

            }else{
                ch -= val.value;
            }
        }else if(val.type == DELETE){
            // just consume the codepoint and contine
            continue;
        }
        //copy the source into destination
        UTF16.append(dest,ch);
    }

    return dest;
}
 
Example 4
Source File: StringPrep.java    From hottub with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }
 
Example 5
Source File: StringPrep.java    From hottub with GNU General Public License v2.0 4 votes vote down vote up
private StringBuffer map( UCharacterIterator iter, int options)
                        throws ParseException {

    Values val = new Values();
    char result = 0;
    int ch  = UCharacterIterator.DONE;
    StringBuffer dest = new StringBuffer();
    boolean allowUnassigned = ((options & ALLOW_UNASSIGNED)>0);

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){

        result = getCodePointValue(ch);
        getValues(result,val);

        // check if the source codepoint is unassigned
        if(val.type == UNASSIGNED && allowUnassigned == false){
             throw new ParseException("An unassigned code point was found in the input " +
                                      iter.getText(), iter.getIndex());
        }else if((val.type == MAP)){
            int index, length;

            if(val.isIndex){
                index = val.value;
                if(index >= indexes[ONE_UCHAR_MAPPING_INDEX_START] &&
                         index < indexes[TWO_UCHARS_MAPPING_INDEX_START]){
                    length = 1;
                }else if(index >= indexes[TWO_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[THREE_UCHARS_MAPPING_INDEX_START]){
                    length = 2;
                }else if(index >= indexes[THREE_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[FOUR_UCHARS_MAPPING_INDEX_START]){
                    length = 3;
                }else{
                    length = mappingData[index++];
                }
                /* copy mapping to destination */
                dest.append(mappingData,index,length);
                continue;

            }else{
                ch -= val.value;
            }
        }else if(val.type == DELETE){
            // just consume the codepoint and contine
            continue;
        }
        //copy the source into destination
        UTF16.append(dest,ch);
    }

    return dest;
}
 
Example 6
Source File: StringPrep.java    From jdk8u-jdk with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }
 
Example 7
Source File: StringPrep.java    From jdk8u_jdk with GNU General Public License v2.0 4 votes vote down vote up
private StringBuffer map( UCharacterIterator iter, int options)
                        throws ParseException {

    Values val = new Values();
    char result = 0;
    int ch  = UCharacterIterator.DONE;
    StringBuffer dest = new StringBuffer();
    boolean allowUnassigned = ((options & ALLOW_UNASSIGNED)>0);

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){

        result = getCodePointValue(ch);
        getValues(result,val);

        // check if the source codepoint is unassigned
        if(val.type == UNASSIGNED && allowUnassigned == false){
             throw new ParseException("An unassigned code point was found in the input " +
                                      iter.getText(), iter.getIndex());
        }else if((val.type == MAP)){
            int index, length;

            if(val.isIndex){
                index = val.value;
                if(index >= indexes[ONE_UCHAR_MAPPING_INDEX_START] &&
                         index < indexes[TWO_UCHARS_MAPPING_INDEX_START]){
                    length = 1;
                }else if(index >= indexes[TWO_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[THREE_UCHARS_MAPPING_INDEX_START]){
                    length = 2;
                }else if(index >= indexes[THREE_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[FOUR_UCHARS_MAPPING_INDEX_START]){
                    length = 3;
                }else{
                    length = mappingData[index++];
                }
                /* copy mapping to destination */
                dest.append(mappingData,index,length);
                continue;

            }else{
                ch -= val.value;
            }
        }else if(val.type == DELETE){
            // just consume the codepoint and contine
            continue;
        }
        //copy the source into destination
        UTF16.append(dest,ch);
    }

    return dest;
}
 
Example 8
Source File: StringPrep.java    From openjdk-jdk9 with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }
 
Example 9
Source File: StringPrep.java    From openjdk-jdk9 with GNU General Public License v2.0 4 votes vote down vote up
private StringBuffer map( UCharacterIterator iter, int options)
                        throws ParseException {

    Values val = new Values();
    char result = 0;
    int ch  = UCharacterIterator.DONE;
    StringBuffer dest = new StringBuffer();
    boolean allowUnassigned = ((options & ALLOW_UNASSIGNED)>0);

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){

        result = getCodePointValue(ch);
        getValues(result,val);

        // check if the source codepoint is unassigned
        if(val.type == UNASSIGNED && allowUnassigned == false){
             throw new ParseException("An unassigned code point was found in the input " +
                                      iter.getText(), iter.getIndex());
        }else if((val.type == MAP)){
            int index, length;

            if(val.isIndex){
                index = val.value;
                if(index >= indexes[ONE_UCHAR_MAPPING_INDEX_START] &&
                         index < indexes[TWO_UCHARS_MAPPING_INDEX_START]){
                    length = 1;
                }else if(index >= indexes[TWO_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[THREE_UCHARS_MAPPING_INDEX_START]){
                    length = 2;
                }else if(index >= indexes[THREE_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[FOUR_UCHARS_MAPPING_INDEX_START]){
                    length = 3;
                }else{
                    length = mappingData[index++];
                }
                /* copy mapping to destination */
                dest.append(mappingData,index,length);
                continue;

            }else{
                ch -= val.value;
            }
        }else if(val.type == DELETE){
            // just consume the codepoint and contine
            continue;
        }
        //copy the source into destination
        UTF16.append(dest,ch);
    }

    return dest;
}
 
Example 10
Source File: StringPrep.java    From Bytecoder with Apache License 2.0 4 votes vote down vote up
/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }
 
Example 11
Source File: StringPrep.java    From jdk8u-dev-jdk with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }
 
Example 12
Source File: StringPrep.java    From openjdk-jdk8u-backup with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }
 
Example 13
Source File: StringPrep.java    From openjdk-8 with GNU General Public License v2.0 4 votes vote down vote up
private StringBuffer map( UCharacterIterator iter, int options)
                        throws ParseException {

    Values val = new Values();
    char result = 0;
    int ch  = UCharacterIterator.DONE;
    StringBuffer dest = new StringBuffer();
    boolean allowUnassigned = ((options & ALLOW_UNASSIGNED)>0);

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){

        result = getCodePointValue(ch);
        getValues(result,val);

        // check if the source codepoint is unassigned
        if(val.type == UNASSIGNED && allowUnassigned == false){
             throw new ParseException("An unassigned code point was found in the input " +
                                      iter.getText(), iter.getIndex());
        }else if((val.type == MAP)){
            int index, length;

            if(val.isIndex){
                index = val.value;
                if(index >= indexes[ONE_UCHAR_MAPPING_INDEX_START] &&
                         index < indexes[TWO_UCHARS_MAPPING_INDEX_START]){
                    length = 1;
                }else if(index >= indexes[TWO_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[THREE_UCHARS_MAPPING_INDEX_START]){
                    length = 2;
                }else if(index >= indexes[THREE_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[FOUR_UCHARS_MAPPING_INDEX_START]){
                    length = 3;
                }else{
                    length = mappingData[index++];
                }
                /* copy mapping to destination */
                dest.append(mappingData,index,length);
                continue;

            }else{
                ch -= val.value;
            }
        }else if(val.type == DELETE){
            // just consume the codepoint and contine
            continue;
        }
        //copy the source into destination
        UTF16.append(dest,ch);
    }

    return dest;
}
 
Example 14
Source File: StringPrep.java    From openjdk-jdk8u with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }
 
Example 15
Source File: StringPrep.java    From jdk8u-jdk with GNU General Public License v2.0 4 votes vote down vote up
private StringBuffer map( UCharacterIterator iter, int options)
                        throws ParseException {

    Values val = new Values();
    char result = 0;
    int ch  = UCharacterIterator.DONE;
    StringBuffer dest = new StringBuffer();
    boolean allowUnassigned = ((options & ALLOW_UNASSIGNED)>0);

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){

        result = getCodePointValue(ch);
        getValues(result,val);

        // check if the source codepoint is unassigned
        if(val.type == UNASSIGNED && allowUnassigned == false){
             throw new ParseException("An unassigned code point was found in the input " +
                                      iter.getText(), iter.getIndex());
        }else if((val.type == MAP)){
            int index, length;

            if(val.isIndex){
                index = val.value;
                if(index >= indexes[ONE_UCHAR_MAPPING_INDEX_START] &&
                         index < indexes[TWO_UCHARS_MAPPING_INDEX_START]){
                    length = 1;
                }else if(index >= indexes[TWO_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[THREE_UCHARS_MAPPING_INDEX_START]){
                    length = 2;
                }else if(index >= indexes[THREE_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[FOUR_UCHARS_MAPPING_INDEX_START]){
                    length = 3;
                }else{
                    length = mappingData[index++];
                }
                /* copy mapping to destination */
                dest.append(mappingData,index,length);
                continue;

            }else{
                ch -= val.value;
            }
        }else if(val.type == DELETE){
            // just consume the codepoint and contine
            continue;
        }
        //copy the source into destination
        UTF16.append(dest,ch);
    }

    return dest;
}
 
Example 16
Source File: StringPrep.java    From jdk8u_jdk with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }
 
Example 17
Source File: StringPrep.java    From jdk8u60 with GNU General Public License v2.0 4 votes vote down vote up
private StringBuffer map( UCharacterIterator iter, int options)
                        throws ParseException {

    Values val = new Values();
    char result = 0;
    int ch  = UCharacterIterator.DONE;
    StringBuffer dest = new StringBuffer();
    boolean allowUnassigned = ((options & ALLOW_UNASSIGNED)>0);

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){

        result = getCodePointValue(ch);
        getValues(result,val);

        // check if the source codepoint is unassigned
        if(val.type == UNASSIGNED && allowUnassigned == false){
             throw new ParseException("An unassigned code point was found in the input " +
                                      iter.getText(), iter.getIndex());
        }else if((val.type == MAP)){
            int index, length;

            if(val.isIndex){
                index = val.value;
                if(index >= indexes[ONE_UCHAR_MAPPING_INDEX_START] &&
                         index < indexes[TWO_UCHARS_MAPPING_INDEX_START]){
                    length = 1;
                }else if(index >= indexes[TWO_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[THREE_UCHARS_MAPPING_INDEX_START]){
                    length = 2;
                }else if(index >= indexes[THREE_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[FOUR_UCHARS_MAPPING_INDEX_START]){
                    length = 3;
                }else{
                    length = mappingData[index++];
                }
                /* copy mapping to destination */
                dest.append(mappingData,index,length);
                continue;

            }else{
                ch -= val.value;
            }
        }else if(val.type == DELETE){
            // just consume the codepoint and contine
            continue;
        }
        //copy the source into destination
        UTF16.append(dest,ch);
    }

    return dest;
}
 
Example 18
Source File: StringPrep.java    From TencentKona-8 with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }
 
Example 19
Source File: StringPrep.java    From TencentKona-8 with GNU General Public License v2.0 4 votes vote down vote up
private StringBuffer map( UCharacterIterator iter, int options)
                        throws ParseException {

    Values val = new Values();
    char result = 0;
    int ch  = UCharacterIterator.DONE;
    StringBuffer dest = new StringBuffer();
    boolean allowUnassigned = ((options & ALLOW_UNASSIGNED)>0);

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){

        result = getCodePointValue(ch);
        getValues(result,val);

        // check if the source codepoint is unassigned
        if(val.type == UNASSIGNED && allowUnassigned == false){
             throw new ParseException("An unassigned code point was found in the input " +
                                      iter.getText(), iter.getIndex());
        }else if((val.type == MAP)){
            int index, length;

            if(val.isIndex){
                index = val.value;
                if(index >= indexes[ONE_UCHAR_MAPPING_INDEX_START] &&
                         index < indexes[TWO_UCHARS_MAPPING_INDEX_START]){
                    length = 1;
                }else if(index >= indexes[TWO_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[THREE_UCHARS_MAPPING_INDEX_START]){
                    length = 2;
                }else if(index >= indexes[THREE_UCHARS_MAPPING_INDEX_START] &&
                         index < indexes[FOUR_UCHARS_MAPPING_INDEX_START]){
                    length = 3;
                }else{
                    length = mappingData[index++];
                }
                /* copy mapping to destination */
                dest.append(mappingData,index,length);
                continue;

            }else{
                ch -= val.value;
            }
        }else if(val.type == DELETE){
            // just consume the codepoint and contine
            continue;
        }
        //copy the source into destination
        UTF16.append(dest,ch);
    }

    return dest;
}
 
Example 20
Source File: StringPrep.java    From jdk8u-jdk with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param src           A UCharacterIterator object containing the source string
 * @param options       A bit set of options:
 *
 *  - StringPrep.NONE               Prohibit processing of unassigned code points in the input
 *
 *  - StringPrep.ALLOW_UNASSIGNED   Treat the unassigned code points are in the input
 *                                  as normal Unicode code points.
 *
 * @return StringBuffer A StringBuffer containing the output
 * @throws ParseException
 * @draft ICU 2.8
 */
public StringBuffer prepare(UCharacterIterator src, int options)
                    throws ParseException{

    // map
    StringBuffer mapOut = map(src,options);
    StringBuffer normOut = mapOut;// initialize

    if(doNFKC){
        // normalize
        normOut = normalize(mapOut);
    }

    int ch;
    char result;
    UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
    Values val = new Values();
    int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
        firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    int rtlPos=-1, ltrPos=-1;
    boolean rightToLeft=false, leftToRight=false;

    while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
        result = getCodePointValue(ch);
        getValues(result,val);

        if(val.type == PROHIBITED ){
            throw new ParseException("A prohibited code point was found in the input" +
                                     iter.getText(), val.value);
        }

        direction = UCharacter.getDirection(ch);
        if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == UCharacterDirection.LEFT_TO_RIGHT){
            leftToRight = true;
            ltrPos = iter.getIndex()-1;
        }
        if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
            rightToLeft = true;
            rtlPos = iter.getIndex()-1;
        }
    }
    if(checkBiDi == true){
        // satisfy 2
        if( leftToRight == true && rightToLeft == true){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
         }

        //satisfy 3
        if( rightToLeft == true &&
            !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
            (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
          ){
            throw new ParseException("The input does not conform to the rules for BiDi code points." +
                                     iter.getText(),
                                     (rtlPos>ltrPos) ? rtlPos : ltrPos);
        }
    }
    return normOut;

  }