sun.text.Normalizer Java Examples
The following examples show how to use
sun.text.Normalizer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConditionalSpecialCasing.java From hottub with GNU General Public License v2.0 | 6 votes |
/** * Implements the "Before_Dot" condition * * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>. * Any sequence of characters with a combining class that is * neither 0 nor 230 may intervene between the current character * and the combining dot above. * * Regular Expression: * After C: ([{cc!=230}&{cc!=0}])*[\u0307] */ private static boolean isBeforeDot(String src, int index) { int ch; int cc; int len = src.length(); // Look for a following COMBINING DOT ABOVE for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); if (ch == '\u0307') { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #2
Source File: StringPrep.java From jdk8u-jdk with GNU General Public License v2.0 | 6 votes |
private StringBuffer normalize(StringBuffer src){ /* * Option UNORM_BEFORE_PRI_29: * * IDNA as interpreted by IETF members (see unicode mailing list 2004H1) * requires strict adherence to Unicode 3.2 normalization, * including buggy composition from before fixing Public Review Issue #29. * Note that this results in some valid but nonsensical text to be * either corrupted or rejected, depending on the text. * See http://www.unicode.org/review/resolved-pri.html#pri29 * See unorm.cpp and cnormtst.c */ return new StringBuffer( Normalizer.normalize( src.toString(), java.text.Normalizer.Form.NFKC, Normalizer.UNICODE_3_2|NormalizerImpl.BEFORE_PRI_29)); }
Example #3
Source File: ConditionalSpecialCasing.java From jdk8u_jdk with GNU General Public License v2.0 | 6 votes |
/** * Implements the "Before_Dot" condition * * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>. * Any sequence of characters with a combining class that is * neither 0 nor 230 may intervene between the current character * and the combining dot above. * * Regular Expression: * After C: ([{cc!=230}&{cc!=0}])*[\u0307] */ private static boolean isBeforeDot(String src, int index) { int ch; int cc; int len = src.length(); // Look for a following COMBINING DOT ABOVE for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); if (ch == '\u0307') { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #4
Source File: ConditionalSpecialCasing.java From jdk-1.7-annotated with Apache License 2.0 | 6 votes |
/** * Implements the "After_Soft_Dotted" condition * * Specification: The last preceding character with combining class * of zero before C was Soft_Dotted, and there is no intervening * combining character class 230 (ABOVE). * * Regular Expression: * Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])* */ private static boolean isAfterSoftDotted(String src, int index) { int ch; int cc; // Look for the last preceding character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (isSoftDotted(ch)) { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #5
Source File: ConditionalSpecialCasing.java From jdk-1.7-annotated with Apache License 2.0 | 6 votes |
/** * Implements the "More_Above" condition * * Specification: C is followed by one or more characters of combining * class 230 (ABOVE) in the combining character sequence. * * Regular Expression: * After C: [{cc!=0}]*[{cc==230}] */ private static boolean isMoreAbove(String src, int index) { int ch; int cc; int len = src.length(); // Look for a following ABOVE combining class character for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); cc = Normalizer.getCombiningClass(ch); if (cc == COMBINING_CLASS_ABOVE) { return true; } else if (cc == 0) { return false; } } return false; }
Example #6
Source File: ConditionalSpecialCasing.java From jdk-1.7-annotated with Apache License 2.0 | 6 votes |
/** * Implements the "After_I" condition * * Specification: The last preceding base character was an uppercase I, * and there is no intervening combining character class 230 (ABOVE). * * Regular Expression: * Before C: [I]([{cc!=230}&{cc!=0}])* */ private static boolean isAfterI(String src, int index) { int ch; int cc; // Look for the last preceding base character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (ch == 'I') { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #7
Source File: ConditionalSpecialCasing.java From jdk8u-jdk with GNU General Public License v2.0 | 6 votes |
/** * Implements the "More_Above" condition * * Specification: C is followed by one or more characters of combining * class 230 (ABOVE) in the combining character sequence. * * Regular Expression: * After C: [{cc!=0}]*[{cc==230}] */ private static boolean isMoreAbove(String src, int index) { int ch; int cc; int len = src.length(); // Look for a following ABOVE combining class character for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); cc = Normalizer.getCombiningClass(ch); if (cc == COMBINING_CLASS_ABOVE) { return true; } else if (cc == 0) { return false; } } return false; }
Example #8
Source File: ConditionalSpecialCasing.java From jdk8u-jdk with GNU General Public License v2.0 | 6 votes |
/** * Implements the "After_Soft_Dotted" condition * * Specification: The last preceding character with combining class * of zero before C was Soft_Dotted, and there is no intervening * combining character class 230 (ABOVE). * * Regular Expression: * Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])* */ private static boolean isAfterSoftDotted(String src, int index) { int ch; int cc; // Look for the last preceding character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (isSoftDotted(ch)) { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #9
Source File: ConditionalSpecialCasing.java From jdk8u-jdk with GNU General Public License v2.0 | 6 votes |
/** * Implements the "After_I" condition * * Specification: The last preceding base character was an uppercase I, * and there is no intervening combining character class 230 (ABOVE). * * Regular Expression: * Before C: [I]([{cc!=230}&{cc!=0}])* */ private static boolean isAfterI(String src, int index) { int ch; int cc; // Look for the last preceding base character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (ch == 'I') { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #10
Source File: StringPrep.java From jdk8u_jdk with GNU General Public License v2.0 | 6 votes |
private StringBuffer normalize(StringBuffer src){ /* * Option UNORM_BEFORE_PRI_29: * * IDNA as interpreted by IETF members (see unicode mailing list 2004H1) * requires strict adherence to Unicode 3.2 normalization, * including buggy composition from before fixing Public Review Issue #29. * Note that this results in some valid but nonsensical text to be * either corrupted or rejected, depending on the text. * See http://www.unicode.org/review/resolved-pri.html#pri29 * See unorm.cpp and cnormtst.c */ return new StringBuffer( Normalizer.normalize( src.toString(), java.text.Normalizer.Form.NFKC, Normalizer.UNICODE_3_2|NormalizerImpl.BEFORE_PRI_29)); }
Example #11
Source File: ConditionalSpecialCasing.java From jdk8u-jdk with GNU General Public License v2.0 | 6 votes |
/** * Implements the "Before_Dot" condition * * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>. * Any sequence of characters with a combining class that is * neither 0 nor 230 may intervene between the current character * and the combining dot above. * * Regular Expression: * After C: ([{cc!=230}&{cc!=0}])*[\u0307] */ private static boolean isBeforeDot(String src, int index) { int ch; int cc; int len = src.length(); // Look for a following COMBINING DOT ABOVE for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); if (ch == '\u0307') { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #12
Source File: StringPrep.java From jdk8u-dev-jdk with GNU General Public License v2.0 | 6 votes |
private StringBuffer normalize(StringBuffer src){ /* * Option UNORM_BEFORE_PRI_29: * * IDNA as interpreted by IETF members (see unicode mailing list 2004H1) * requires strict adherence to Unicode 3.2 normalization, * including buggy composition from before fixing Public Review Issue #29. * Note that this results in some valid but nonsensical text to be * either corrupted or rejected, depending on the text. * See http://www.unicode.org/review/resolved-pri.html#pri29 * See unorm.cpp and cnormtst.c */ return new StringBuffer( Normalizer.normalize( src.toString(), java.text.Normalizer.Form.NFKC, Normalizer.UNICODE_3_2|NormalizerImpl.BEFORE_PRI_29)); }
Example #13
Source File: ConditionalSpecialCasing.java From openjdk-8-source with GNU General Public License v2.0 | 6 votes |
/** * Implements the "After_Soft_Dotted" condition * * Specification: The last preceding character with combining class * of zero before C was Soft_Dotted, and there is no intervening * combining character class 230 (ABOVE). * * Regular Expression: * Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])* */ private static boolean isAfterSoftDotted(String src, int index) { int ch; int cc; // Look for the last preceding character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (isSoftDotted(ch)) { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #14
Source File: ConditionalSpecialCasing.java From openjdk-8-source with GNU General Public License v2.0 | 6 votes |
/** * Implements the "After_I" condition * * Specification: The last preceding base character was an uppercase I, * and there is no intervening combining character class 230 (ABOVE). * * Regular Expression: * Before C: [I]([{cc!=230}&{cc!=0}])* */ private static boolean isAfterI(String src, int index) { int ch; int cc; // Look for the last preceding base character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (ch == 'I') { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #15
Source File: StringPrep.java From hottub with GNU General Public License v2.0 | 6 votes |
private StringBuffer normalize(StringBuffer src){ /* * Option UNORM_BEFORE_PRI_29: * * IDNA as interpreted by IETF members (see unicode mailing list 2004H1) * requires strict adherence to Unicode 3.2 normalization, * including buggy composition from before fixing Public Review Issue #29. * Note that this results in some valid but nonsensical text to be * either corrupted or rejected, depending on the text. * See http://www.unicode.org/review/resolved-pri.html#pri29 * See unorm.cpp and cnormtst.c */ return new StringBuffer( Normalizer.normalize( src.toString(), java.text.Normalizer.Form.NFKC, Normalizer.UNICODE_3_2|NormalizerImpl.BEFORE_PRI_29)); }
Example #16
Source File: ConditionalSpecialCasing.java From openjdk-8-source with GNU General Public License v2.0 | 6 votes |
/** * Implements the "Before_Dot" condition * * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>. * Any sequence of characters with a combining class that is * neither 0 nor 230 may intervene between the current character * and the combining dot above. * * Regular Expression: * After C: ([{cc!=230}&{cc!=0}])*[\u0307] */ private static boolean isBeforeDot(String src, int index) { int ch; int cc; int len = src.length(); // Look for a following COMBINING DOT ABOVE for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); if (ch == '\u0307') { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #17
Source File: ConditionalSpecialCasing.java From hottub with GNU General Public License v2.0 | 6 votes |
/** * Implements the "More_Above" condition * * Specification: C is followed by one or more characters of combining * class 230 (ABOVE) in the combining character sequence. * * Regular Expression: * After C: [{cc!=0}]*[{cc==230}] */ private static boolean isMoreAbove(String src, int index) { int ch; int cc; int len = src.length(); // Look for a following ABOVE combining class character for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); cc = Normalizer.getCombiningClass(ch); if (cc == COMBINING_CLASS_ABOVE) { return true; } else if (cc == 0) { return false; } } return false; }
Example #18
Source File: ConditionalSpecialCasing.java From hottub with GNU General Public License v2.0 | 6 votes |
/** * Implements the "After_Soft_Dotted" condition * * Specification: The last preceding character with combining class * of zero before C was Soft_Dotted, and there is no intervening * combining character class 230 (ABOVE). * * Regular Expression: * Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])* */ private static boolean isAfterSoftDotted(String src, int index) { int ch; int cc; // Look for the last preceding character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (isSoftDotted(ch)) { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #19
Source File: ConditionalSpecialCasing.java From hottub with GNU General Public License v2.0 | 6 votes |
/** * Implements the "After_I" condition * * Specification: The last preceding base character was an uppercase I, * and there is no intervening combining character class 230 (ABOVE). * * Regular Expression: * Before C: [I]([{cc!=230}&{cc!=0}])* */ private static boolean isAfterI(String src, int index) { int ch; int cc; // Look for the last preceding base character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (ch == 'I') { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #20
Source File: ConditionalSpecialCasing.java From Java8CN with Apache License 2.0 | 6 votes |
/** * Implements the "Before_Dot" condition * * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>. * Any sequence of characters with a combining class that is * neither 0 nor 230 may intervene between the current character * and the combining dot above. * * Regular Expression: * After C: ([{cc!=230}&{cc!=0}])*[\u0307] */ private static boolean isBeforeDot(String src, int index) { int ch; int cc; int len = src.length(); // Look for a following COMBINING DOT ABOVE for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); if (ch == '\u0307') { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #21
Source File: ConditionalSpecialCasing.java From Java8CN with Apache License 2.0 | 6 votes |
/** * Implements the "More_Above" condition * * Specification: C is followed by one or more characters of combining * class 230 (ABOVE) in the combining character sequence. * * Regular Expression: * After C: [{cc!=0}]*[{cc==230}] */ private static boolean isMoreAbove(String src, int index) { int ch; int cc; int len = src.length(); // Look for a following ABOVE combining class character for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); cc = Normalizer.getCombiningClass(ch); if (cc == COMBINING_CLASS_ABOVE) { return true; } else if (cc == 0) { return false; } } return false; }
Example #22
Source File: ConditionalSpecialCasing.java From Java8CN with Apache License 2.0 | 6 votes |
/** * Implements the "After_Soft_Dotted" condition * * Specification: The last preceding character with combining class * of zero before C was Soft_Dotted, and there is no intervening * combining character class 230 (ABOVE). * * Regular Expression: * Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])* */ private static boolean isAfterSoftDotted(String src, int index) { int ch; int cc; // Look for the last preceding character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (isSoftDotted(ch)) { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #23
Source File: ConditionalSpecialCasing.java From Java8CN with Apache License 2.0 | 6 votes |
/** * Implements the "After_I" condition * * Specification: The last preceding base character was an uppercase I, * and there is no intervening combining character class 230 (ABOVE). * * Regular Expression: * Before C: [I]([{cc!=230}&{cc!=0}])* */ private static boolean isAfterI(String src, int index) { int ch; int cc; // Look for the last preceding base character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (ch == 'I') { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #24
Source File: StringPrep.java From jdk8u-jdk with GNU General Public License v2.0 | 6 votes |
private StringBuffer normalize(StringBuffer src){ /* * Option UNORM_BEFORE_PRI_29: * * IDNA as interpreted by IETF members (see unicode mailing list 2004H1) * requires strict adherence to Unicode 3.2 normalization, * including buggy composition from before fixing Public Review Issue #29. * Note that this results in some valid but nonsensical text to be * either corrupted or rejected, depending on the text. * See http://www.unicode.org/review/resolved-pri.html#pri29 * See unorm.cpp and cnormtst.c */ return new StringBuffer( Normalizer.normalize( src.toString(), java.text.Normalizer.Form.NFKC, Normalizer.UNICODE_3_2|NormalizerImpl.BEFORE_PRI_29)); }
Example #25
Source File: ConditionalSpecialCasing.java From jdk8u-jdk with GNU General Public License v2.0 | 6 votes |
/** * Implements the "Before_Dot" condition * * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>. * Any sequence of characters with a combining class that is * neither 0 nor 230 may intervene between the current character * and the combining dot above. * * Regular Expression: * After C: ([{cc!=230}&{cc!=0}])*[\u0307] */ private static boolean isBeforeDot(String src, int index) { int ch; int cc; int len = src.length(); // Look for a following COMBINING DOT ABOVE for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); if (ch == '\u0307') { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #26
Source File: ConditionalSpecialCasing.java From jdk8u-jdk with GNU General Public License v2.0 | 6 votes |
/** * Implements the "More_Above" condition * * Specification: C is followed by one or more characters of combining * class 230 (ABOVE) in the combining character sequence. * * Regular Expression: * After C: [{cc!=0}]*[{cc==230}] */ private static boolean isMoreAbove(String src, int index) { int ch; int cc; int len = src.length(); // Look for a following ABOVE combining class character for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); cc = Normalizer.getCombiningClass(ch); if (cc == COMBINING_CLASS_ABOVE) { return true; } else if (cc == 0) { return false; } } return false; }
Example #27
Source File: ConditionalSpecialCasing.java From jdk8u-jdk with GNU General Public License v2.0 | 6 votes |
/** * Implements the "After_Soft_Dotted" condition * * Specification: The last preceding character with combining class * of zero before C was Soft_Dotted, and there is no intervening * combining character class 230 (ABOVE). * * Regular Expression: * Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])* */ private static boolean isAfterSoftDotted(String src, int index) { int ch; int cc; // Look for the last preceding character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (isSoftDotted(ch)) { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #28
Source File: ConditionalSpecialCasing.java From jdk8u-jdk with GNU General Public License v2.0 | 6 votes |
/** * Implements the "After_I" condition * * Specification: The last preceding base character was an uppercase I, * and there is no intervening combining character class 230 (ABOVE). * * Regular Expression: * Before C: [I]([{cc!=230}&{cc!=0}])* */ private static boolean isAfterI(String src, int index) { int ch; int cc; // Look for the last preceding base character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (ch == 'I') { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }
Example #29
Source File: StringPrep.java From openjdk-jdk9 with GNU General Public License v2.0 | 6 votes |
private StringBuffer normalize(StringBuffer src){ /* * Option UNORM_BEFORE_PRI_29: * * IDNA as interpreted by IETF members (see unicode mailing list 2004H1) * requires strict adherence to Unicode 3.2 normalization, * including buggy composition from before fixing Public Review Issue #29. * Note that this results in some valid but nonsensical text to be * either corrupted or rejected, depending on the text. * See http://www.unicode.org/review/resolved-pri.html#pri29 * See unorm.cpp and cnormtst.c */ return new StringBuffer( Normalizer.normalize( src.toString(), java.text.Normalizer.Form.NFKC, Normalizer.UNICODE_3_2)); }
Example #30
Source File: ConditionalSpecialCasing.java From openjdk-jdk9 with GNU General Public License v2.0 | 6 votes |
/** * Implements the "Before_Dot" condition * * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>. * Any sequence of characters with a combining class that is * neither 0 nor 230 may intervene between the current character * and the combining dot above. * * Regular Expression: * After C: ([{cc!=230}&{cc!=0}])*[\u0307] */ private static boolean isBeforeDot(String src, int index) { int ch; int cc; int len = src.length(); // Look for a following COMBINING DOT ABOVE for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); if (ch == '\u0307') { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false; }