org.apache.commons.lang3.text.translate.CharSequenceTranslator Java Exaples

Source File: StringEscapeUtilsTest.java From astor with GNU General Public License v2.0

6 votes

@Test
public void testEscapeXmlAllCharacters() {
    // http://www.w3.org/TR/xml/#charsets says:
    // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character,
    // excluding the surrogate blocks, FFFE, and FFFF. */
    final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML
            .with(NumericEntityEscaper.below(9), NumericEntityEscaper.between(0xB, 0xC), NumericEntityEscaper.between(0xE, 0x19),
                    NumericEntityEscaper.between(0xD800, 0xDFFF), NumericEntityEscaper.between(0xFFFE, 0xFFFF), NumericEntityEscaper.above(0x110000));

    assertEquals("&#0;&#1;&#2;&#3;&#4;&#5;&#6;&#7;&#8;", escapeXml.translate("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008"));
    assertEquals("\t", escapeXml.translate("\t")); // 0x9
    assertEquals("\n", escapeXml.translate("\n")); // 0xA
    assertEquals("&#11;&#12;", escapeXml.translate("\u000B\u000C"));
    assertEquals("\r", escapeXml.translate("\r")); // 0xD
    assertEquals("Hello World! Ain&apos;t this great?", escapeXml.translate("Hello World! Ain't this great?"));
    assertEquals("&#14;&#15;&#24;&#25;", escapeXml.translate("\u000E\u000F\u0018\u0019"));
}

Source File: YamlStringEscapeUtils.java From ratel with Apache License 2.0

4 votes

/**
 * @param out write to receieve the escaped string
 * @param str String to escape values in, may be null
 * @param escapeSingleQuote escapes single quotes if <code>true</code>
 * @param escapeForwardSlash TODO
 * @throws IOException if an IOException occurs
 */
private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote,
                                          boolean escapeForwardSlash) throws IOException {
    if (out == null) {
        throw new IllegalArgumentException("The Writer must not be null");
    }
    if (str == null) {
        return;
    }
    int sz;
    sz = str.length();
    for (int i = 0; i < sz; i++) {
        char ch = str.charAt(i);
        // "[^\t\n\r\u0020-\u007E\u0085\u00A0-\uD7FF\uE000-\uFFFD]"
        // handle unicode
        if (ch > 0xFFFD) {
            out.write("\\u" + CharSequenceTranslator.hex(ch));
        } else if (ch > 0xD7FF && ch < 0xE000) {
            out.write("\\u" + CharSequenceTranslator.hex(ch));
        } else if (ch > 0x7E && ch != 0x85 && ch < 0xA0) {
            out.write("\\u00" + CharSequenceTranslator.hex(ch));
        } else if (ch < 32) {
            switch (ch) {
                case '\t' :
                    out.write('\\');
                    out.write('t');
                    break;
                case '\n' :
                    out.write('\\');
                    out.write('n');
                    break;
                case '\r' :
                    out.write('\\');
                    out.write('r');
                    break;
                default :
                    if (ch > 0xf) {
                        out.write("\\u00" + CharSequenceTranslator.hex(ch));
                    } else {
                        out.write("\\u000" + CharSequenceTranslator.hex(ch));
                    }
                    break;
            }
        } else {
            switch (ch) {
                case '\'' :
                    if (escapeSingleQuote) {
                        out.write('\\');
                    }
                    out.write('\'');
                    break;
                case '"' :
                    out.write('\\');
                    out.write('"');
                    break;
                case '\\' :
                    out.write('\\');
                    out.write('\\');
                    break;
                case '/' :
                    if (escapeForwardSlash) {
                        out.write('\\');
                    }
                    out.write('/');
                    break;
                default :
                    out.write(ch);
                    break;
            }
        }
    }
}

Source File: CharSequenceTranslatorEvaluator.java From localization_nifi with Apache License 2.0

4 votes

public CharSequenceTranslatorEvaluator(final Evaluator<String> subject, CharSequenceTranslator method) {
    this.subject = subject;
    this.method = method;
}

Source File: XssProperties.java From super-cloudops with Apache License 2.0

4 votes

private CharTranslator(CharSequenceTranslator translator) {
	notNullOf(translator, "translator");
	this.translator = translator;
}

Source File: XssProperties.java From super-cloudops with Apache License 2.0

4 votes

public CharSequenceTranslator getTranslator() {
	return translator;
}

Source File: StringEscapeUtilsTest.java From astor with GNU General Public License v2.0

3 votes

/**
 * Tests Supplementary characters. 
 * <p>
 * From http://www.w3.org/International/questions/qa-escapes
 * </p>
 * <blockquote>
 * Supplementary characters are those Unicode characters that have code points higher than the characters in
 * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the
 * BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect
 * - you must use the single, code point value for that character. For example, use &#x233B4; rather than &#xD84C;&#xDFB4;.
 * </blockquote>
 * @see <a href="http://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a>
 * @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a>
 */
@Test
public void testEscapeXmlSupplementaryCharacters() {
    CharSequenceTranslator escapeXml = 
        StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );

    assertEquals("Supplementary character must be represented using a single escape", "&#144308;",
            escapeXml.translate("\uD84C\uDFB4"));
}

Source File: StringEscapeUtilsTest.java From astor with GNU General Public License v2.0

3 votes

/**
 * Tests Supplementary characters. 
 * <p>
 * From http://www.w3.org/International/questions/qa-escapes
 * </p>
 * <blockquote>
 * Supplementary characters are those Unicode characters that have code points higher than the characters in
 * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the
 * BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect
 * - you must use the single, code point value for that character. For example, use &#x233B4; rather than &#xD84C;&#xDFB4;.
 * </blockquote>
 * @see <a href="http://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a>
 * @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a>
 */
@Test
public void testEscapeXmlSupplementaryCharacters() {
    final CharSequenceTranslator escapeXml = 
        StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );

    assertEquals("Supplementary character must be represented using a single escape", "&#144308;",
            escapeXml.translate("\uD84C\uDFB4"));
}

Source File: StringEscapeUtilsTest.java From astor with GNU General Public License v2.0

3 votes

/**
 * Tests Supplementary characters. 
 * <p>
 * From http://www.w3.org/International/questions/qa-escapes
 * </p>
 * <blockquote>
 * Supplementary characters are those Unicode characters that have code points higher than the characters in
 * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the
 * BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect
 * - you must use the single, code point value for that character. For example, use &#x233B4; rather than &#xD84C;&#xDFB4;.
 * </blockquote>
 * @see <a href="http://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a>
 * @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a>
 */
@Test
public void testEscapeXmlSupplementaryCharacters() {
    CharSequenceTranslator escapeXml = 
        StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );

    assertEquals("Supplementary character must be represented using a single escape", "&#144308;",
            escapeXml.translate("\uD84C\uDFB4"));
}

org.apache.commons.lang3.text.translate.CharSequenceTranslator Java Examples