org.jsoup.helper.StringUtil Java Exaples

Source File: RuleDatabaseService.java From WeEvent with Apache License 2.0

6 votes

public List<RuleDatabaseEntity> getRuleDataBaseList(HttpServletRequest request, RuleDatabaseEntity ruleDatabaseEntity) throws GovernanceException {
    try {
        ruleDatabaseEntity.setSystemTag(false);
        Example<RuleDatabaseEntity> entityExample = Example.of(ruleDatabaseEntity);
        List<RuleDatabaseEntity> ruleDatabaseEntityList = ruleDatabaseRepository.findAll(entityExample);
        ruleDatabaseEntityList.forEach(ruleDataBase -> {
            String dataBaseUrl = ruleDataBase.getDatabaseUrl();
            if (StringUtil.isBlank(ruleDataBase.getOptionalParameter())) {
                ruleDataBase.setDatabaseUrl(dataBaseUrl);
            } else {
                ruleDataBase.setDatabaseUrl(dataBaseUrl + "?" + ruleDataBase.getOptionalParameter());
            }
        });
        return ruleDatabaseEntityList;
    } catch (Exception e) {
        log.error("get ruleDatabaseList fail", e);
        throw new GovernanceException("get ruleDatabaseList fail", e);
    }

}

Source File: RuleEngineService.java From WeEvent with Apache License 2.0

6 votes

private void checkStartRuleRequired(RuleEngineEntity rule) throws GovernanceException {
    if (StringUtil.isBlank(rule.getRuleName())) {
        log.error("the ruleName is empty");
        throw new GovernanceException("the ruleName is empty");
    }
    if (rule.getUserId() == null) {
        log.error("the userId is empty");
        throw new GovernanceException("the userId is empty");
    }
    if (rule.getBrokerId() == null) {
        log.error("the brokerId is empty");
        throw new GovernanceException("the brokerId is empty");
    }

    if (StringUtil.isBlank(rule.getBrokerUrl())) {
        log.error("the brokerUrl is empty");
        throw new GovernanceException("the brokerUrl is empty");
    }
    this.checkField(rule);
}

Source File: HtmlParserTest.java From astor with GNU General Public License v2.0

6 votes

@Test public void handlesInvalidDoctypes() {
    // would previously throw invalid name exception on empty doctype
    Document doc = Jsoup.parse("<!DOCTYPE>");
    assertEquals(
            "<!doctype> <html> <head></head> <body></body> </html>",
            StringUtil.normaliseWhitespace(doc.outerHtml()));

    doc = Jsoup.parse("<!DOCTYPE><html><p>Foo</p></html>");
    assertEquals(
            "<!doctype> <html> <head></head> <body> <p>Foo</p> </body> </html>",
            StringUtil.normaliseWhitespace(doc.outerHtml()));

    doc = Jsoup.parse("<!DOCTYPE \u0000>");
    assertEquals(
            "<!doctype �> <html> <head></head> <body></body> </html>",
            StringUtil.normaliseWhitespace(doc.outerHtml()));
}

Source File: Element.java From astor with GNU General Public License v2.0

6 votes

/**
 * Get a CSS selector that will uniquely select this element.
 * <p>
 * If the element has an ID, returns #id;
 * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'},
 * followed by a unique selector for the element (tag.class.class:nth-child(n)).
 * </p>
 *
 * @return the CSS Path that can be used to retrieve the element in a selector.
 */
public String cssSelector() {
    if (id().length() > 0)
        return "#" + id();

    // Translate HTML namespace ns:tag to CSS namespace syntax ns|tag
    String tagName = tagName().replace(':', '|');
    StringBuilder selector = new StringBuilder(tagName);
    String classes = StringUtil.join(classNames(), ".");
    if (classes.length() > 0)
        selector.append('.').append(classes);

    if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node
        return selector.toString();

    selector.insert(0, " > ");
    if (parent().select(selector.toString()).size() > 1)
        selector.append(String.format(
            ":nth-child(%d)", elementSiblingIndex() + 1));

    return parent().cssSelector() + selector.toString();
}

Source File: AbstractHACCommunicationManager.java From hybris-commerce-eclipse-plugin with Apache License 2.0

6 votes

/**
 * Send HTTP GET request to {@link #endpointUrl}, updates {@link #csrfToken}
 * token
 *
 * @return true if {@link #endpointUrl} is accessible
 * @throws IOException
 * @throws ClientProtocolException
 * @throws AuthenticationException
 */
protected void fetchCsrfTokenFromHac() throws ClientProtocolException, IOException, AuthenticationException {
	final HttpGet getRequest = new HttpGet(getEndpointUrl());

	try {
		final HttpResponse response = httpClient.execute(getRequest, getContext());
		final String responseString = new BasicResponseHandler().handleResponse(response);
		csrfToken = getCsrfToken(responseString);

		if (StringUtil.isBlank(csrfToken)) {
			throw new AuthenticationException(ErrorMessage.CSRF_TOKEN_CANNOT_BE_OBTAINED);
		}
	} catch (UnknownHostException error) {
		final String errorMessage = error.getMessage();
		final Matcher matcher = HACPreferenceConstants.HOST_REGEXP_PATTERN.matcher(getEndpointUrl());

		if (matcher.find() && matcher.group(1).equals(errorMessage)) {
			throw new UnknownHostException(
					String.format(ErrorMessage.UNKNOWN_HOST_EXCEPTION_MESSAGE_FORMAT, matcher.group(1)));
		}
		throw error;
	}
}

Source File: Tokeniser.java From astor with GNU General Public License v2.0

6 votes

/**
 * Utility method to consume reader and unescape entities found within.
 * @param inAttribute
 * @return unescaped string from reader
 */
String unescapeEntities(boolean inAttribute) {
    StringBuilder builder = StringUtil.stringBuilder();
    while (!reader.isEmpty()) {
        builder.append(reader.consumeTo('&'));
        if (reader.matches('&')) {
            reader.consume();
            int[] c = consumeCharacterReference(null, inAttribute);
            if (c == null || c.length==0)
                builder.append('&');
            else {
                builder.appendCodePoint(c[0]);
                if (c.length == 2)
                    builder.appendCodePoint(c[1]);
            }

        }
    }
    return builder.toString();
}

Source File: RuleEngineService.java From WeEvent with Apache License 2.0

6 votes

private void setRuleDataBaseUrl(RuleEngineEntity rule) {
    if (rule.getRuleDataBaseId() == null) {
        return;
    }
    RuleDatabaseEntity ruleDataBase = ruleDatabaseRepository.findById(rule.getRuleDataBaseId());
    if (ruleDataBase != null) {
        String dbUrl = ruleDataBase.getDatabaseUrl() + "?user=" + ruleDataBase.getUsername() + "&password=" + ruleDataBase.getPassword() +
                "&tableName=" + rule.getTableName();
        if (!StringUtil.isBlank(ruleDataBase.getOptionalParameter())) {
            dbUrl = dbUrl + "&" + ruleDataBase.getOptionalParameter();
        }
        rule.setDatabaseUrl(dbUrl);
        rule.setDatabaseType(ruleDataBase.getDatabaseType());
        log.info("dataBaseUrl:{}", rule.getDatabaseUrl());
    }
}

Source File: HtmlToPlainText.java From eclipse.jdt.ls with Eclipse Public License 2.0

6 votes

@Override
public void head(Node node, int depth) {
	String name = node.nodeName();
	if (node instanceof TextNode) {
		append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
	} else if (name.equals("ul")) {
		listNesting++;
	} else if (name.equals("li")) {
		append("\n ");
		for (int i = 1; i < listNesting; i++) {
			append("  ");
		}
		if (listNesting == 1) {
			append("* ");
		} else {
			append("- ");
		}
	} else if (name.equals("dt")) {
		append("  ");
	} else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) {
		append("\n");
	}
}

Source File: RestAPINetworkManager.java From Shaarlier with GNU General Public License v3.0

5 votes

@Override
public Link prefetchLinkData(Link link) throws IOException {
    // TODO: There might be some bugs here, e.g:
    // - If the scheme used is not the same that on the saved link
    // - If there are tracking tags that don't match
    // We might want to open an Issue on Shaarli to get feedback
    String url = new URL(this.mAccount.getUrlShaarli() + LINK_URL).toExternalForm();
    String body = this.newConnection(url, Connection.Method.GET)
            .data("offset", "0")
            .data("limit", "1")
            .data("searchterm", link.getUrl())
            .execute()
            .body();
    Log.d("RestAPI:prefetch", body);

    Link updatedLink = new Link(link);
    try {
        JSONArray resp = new JSONArray(body);
        if (resp.length() < 1) {
            Log.i("RestAPI:prefetch", "New link");
        } else {
            Log.i("RestAPI:prefetch", "Found 1 link result (not new link)");
            JSONObject returnedLink = resp.getJSONObject(0);
            updatedLink.setUrl(returnedLink.getString("url"));
            updatedLink.setTitle(returnedLink.getString("title"));
            updatedLink.setDescription(returnedLink.getString("description"));
            updatedLink.setPrivate(returnedLink.getBoolean("private"));
            JSONArray jsonTags = returnedLink.getJSONArray("tags");
            ArrayList<String> tags = new ArrayList<>();
            for (int i = 0; i < jsonTags.length(); i++) {
                tags.add(jsonTags.getString(i));
            }
            updatedLink.setTags(StringUtil.join(tags, ", "));
        }
    } catch (JSONException e) {
        Log.e("RestAPI:prefetch", e.toString());
    }
    return updatedLink;
}

Source File: UrlConnectTest.java From astor with GNU General Public License v2.0

5 votes

@Test
public void fetchHandlesXmlAsHtmlWhenParserSet() throws IOException {
    // should auto-detect xml and use XML parser, unless explicitly requested the html parser
    String xmlUrl = "http://direct.infohound.net/tools/parse-xml.xml";
    Connection con = Jsoup.connect(xmlUrl).parser(Parser.htmlParser());
    Document doc = con.get();
    Connection.Request req = con.request();
    assertTrue(req.parser().getTreeBuilder() instanceof HtmlTreeBuilder);
    assertEquals("<html> <head></head> <body> <xml> <link>one <table> Two </table> </xml> </body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml()));
}

Source File: HtmlParserTest.java From astor with GNU General Public License v2.0

5 votes

@Test public void doesNotFindShortestMatchingEntity() {
    // previous behaviour was to identify a possible entity, then chomp down the string until a match was found.
    // (as defined in html5.) However in practise that lead to spurious matches against the author's intent.
    String html = "One &clubsuite; &clubsuit;";
    Document doc = Jsoup.parse(html);
    assertEquals(StringUtil.normaliseWhitespace("One &amp;clubsuite; ♣"), doc.body().html());
}

Source File: XmlTreeBuilderTest.java From astor with GNU General Public License v2.0

5 votes

@Test public void handlesXmlDeclarationAsDeclaration() {
    String html = "<?xml encoding='UTF-8' ?><body>One</body><!-- comment -->";
    Document doc = Jsoup.parse(html, "", Parser.xmlParser());
    assertEquals("<?xml encoding='UTF-8' ?> <body> One </body> <!-- comment -->",
            StringUtil.normaliseWhitespace(doc.outerHtml()));
    assertEquals("#declaration", doc.childNode(0).nodeName());
    assertEquals("#comment", doc.childNode(2).nodeName());
}

Source File: HtmlToPlainText.java From astor with GNU General Public License v2.0

5 votes

private void append(String text) {
    if (text.startsWith("\n"))
        width = 0; // reset counter if starts with a newline. only from formats above, not in natural text
    if (text.equals(" ") &&
            (accum.length() == 0 || StringUtil.in(accum.substring(accum.length() - 1), " ", "\n")))
        return; // don't accumulate long runs of empty spaces

    if (text.length() + width > maxWidth) { // won't fit, needs to wrap
        String words[] = text.split("\\s+");
        for (int i = 0; i < words.length; i++) {
            String word = words[i];
            boolean last = i == words.length - 1;
            if (!last) // insert a space if not the last word
                word = word + " ";
            if (word.length() + width > maxWidth) { // wrap and reset counter
                accum.append("\n").append(word);
                width = word.length();
            } else {
                accum.append(word);
                width += word.length();
            }
        }
    } else { // fits as is, without need to wrap text
        accum.append(text);
        width += text.length();
    }
}

Source File: AbstractHACCommunicationManager.java From hybris-commerce-eclipse-plugin with Apache License 2.0

5 votes

/**
 * Retrieves csrf token from response body
 *
 * @param responseBody
 *            response body of GET method
 * @return csrf token
 * @throws AuthenticationException
 */
protected String getCsrfToken(String responseBody) throws AuthenticationException {
	if (StringUtil.isBlank(responseBody)) {
		throw new AuthenticationException(ErrorMessage.CSRF_RESPONSE_CANNOT_BE_BLANK);
	}

	final Document document = Jsoup.parse(responseBody);
	return document.select(Meta.CSRF_META_TAG).attr(Meta.CSRF_META_TAG_CONTENT);
}

Source File: HtmlToPlainText.java From intellij-quarkus with Eclipse Public License 2.0

5 votes

@Override
public void tail(Node node, int depth) {
    String name = node.nodeName();
    if (StringUtil.in(name, "br", "dd", "dt", "p", "h1", "h2", "h3", "h4", "h5")) {
        append("\n");
    } else if (StringUtil.in(name, "th", "td")) {
        append(" ");
    } else if (name.equals("a")) {
        append(String.format(" <%s>", node.absUrl("href")));
    } else if (name.equals("ul")) {
        listNesting--;
    }
}

Source File: DocumentType.java From jsoup-learning with MIT License

5 votes

@Override
void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
    accum.append("<!DOCTYPE ").append(attr("name"));
    if (!StringUtil.isBlank(attr("publicId")))
        accum.append(" PUBLIC \"").append(attr("publicId")).append("\"");
    if (!StringUtil.isBlank(attr("systemId")))
        accum.append(" \"").append(attr("systemId")).append("\"");
    accum.append('>');
}

Source File: UrlConnectTest.java From astor with GNU General Public License v2.0

5 votes

@Test
public void fetchHandlesXmlAsHtmlWhenParserSet() throws IOException {
    // should auto-detect xml and use XML parser, unless explicitly requested the html parser
    String xmlUrl = "http://direct.infohound.net/tools/parse-xml.xml";
    Connection con = Jsoup.connect(xmlUrl).parser(Parser.htmlParser());
    Document doc = con.get();
    Connection.Request req = con.request();
    assertTrue(req.parser().getTreeBuilder() instanceof HtmlTreeBuilder);
    assertEquals("<html> <head></head> <body> <xml> <link>one <table> Two </table> </xml> </body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml()));
}

Source File: HtmlParserTest.java From astor with GNU General Public License v2.0

5 votes

@Test public void caseSensitiveParseTree() {
    String html = "<r><X>A</X><y>B</y></r>";
    Parser parser = Parser.htmlParser();
    parser.settings(ParseSettings.preserveCase);
    Document doc = parser.parseInput(html, "");
    assertEquals("<r> <X> A </X> <y> B </y> </r>", StringUtil.normaliseWhitespace(doc.body().html()));
}

Source File: HtmlParserTest.java From astor with GNU General Public License v2.0

5 votes

@Test public void doesNotFindShortestMatchingEntity() {
    // previous behaviour was to identify a possible entity, then chomp down the string until a match was found.
    // (as defined in html5.) However in practise that lead to spurious matches against the author's intent.
    String html = "One &clubsuite; &clubsuit;";
    Document doc = Jsoup.parse(html);
    assertEquals(StringUtil.normaliseWhitespace("One &amp;clubsuite; ♣"), doc.body().html());
}

Source File: HtmlParserTest.java From astor with GNU General Public License v2.0

5 votes

@Test public void handleNullContextInParseFragment() {
    String html = "<ol><li>One</li></ol><p>Two</p>";
    List<Node> nodes = Parser.parseFragment(html, null, "http://example.com/");
    assertEquals(1, nodes.size()); // returns <html> node (not document) -- no context means doc gets created
    assertEquals("html", nodes.get(0).nodeName());
    assertEquals("<html> <head></head> <body> <ol> <li>One</li> </ol> <p>Two</p> </body> </html>", StringUtil.normaliseWhitespace(nodes.get(0).outerHtml()));
}

Source File: HtmlTreeBuilder.java From jsoup-learning with MIT License

5 votes

private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) {
    Iterator<Element> it = stack.descendingIterator();
    while (it.hasNext()) {
        Element el = it.next();
        String elName = el.nodeName();
        if (StringUtil.in(elName, targetNames))
            return true;
        if (StringUtil.in(elName, baseTypes))
            return false;
        if (extraTypes != null && StringUtil.in(elName, extraTypes))
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}

Source File: UrlConnectTest.java From astor with GNU General Public License v2.0

5 votes

@Test
public void fetchHandlesXml() throws IOException {
    // should auto-detect xml and use XML parser, unless explicitly requested the html parser
    String xmlUrl = "http://direct.infohound.net/tools/parse-xml.xml";
    Connection con = Jsoup.connect(xmlUrl);
    Document doc = con.get();
    Connection.Request req = con.request();
    assertTrue(req.parser().getTreeBuilder() instanceof XmlTreeBuilder);
    assertEquals("<xml> <link> one </link> <table> Two </table> </xml>", StringUtil.normaliseWhitespace(doc.outerHtml()));
}

Source File: HtmlTreeBuilder.java From astor with GNU General Public License v2.0

5 votes

void popStackToClose(String... elNames) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element next = stack.get(pos);
        stack.remove(pos);
        if (StringUtil.in(next.nodeName(), elNames))
            break;
    }
}

Source File: HtmlTreeBuilder.java From astor with GNU General Public License v2.0

5 votes

private void clearStackToContext(String... nodeNames) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element next = stack.get(pos);
        if (StringUtil.in(next.nodeName(), nodeNames) || next.nodeName().equals("html"))
            break;
        else
            stack.remove(pos);
    }
}

Source File: HtmlParserTest.java From astor with GNU General Public License v2.0

5 votes

@Test public void selfClosingOnNonvoidIsError() {
    String html = "<p>test</p><div /><div>Two</div>";
    Parser parser = Parser.htmlParser().setTrackErrors(5);
    parser.parseInput(html, "");
    assertEquals(1, parser.getErrors().size());
    assertEquals("18: Tag cannot be self closing; not a void tag", parser.getErrors().get(0).toString());

    assertFalse(Jsoup.isValid(html, Whitelist.relaxed()));
    String clean = Jsoup.clean(html, Whitelist.relaxed());
    assertEquals("<p>test</p> <div></div> <div> Two </div>", StringUtil.normaliseWhitespace(clean));
}

Source File: HtmlTreeBuilder.java From astor with GNU General Public License v2.0

5 votes

boolean inSelectScope(String targetName) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element el = stack.get(pos);
        String elName = el.nodeName();
        if (elName.equals(targetName))
            return true;
        if (!StringUtil.in(elName, TagSearchSelectScope)) // all elements except
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}

Source File: HtmlToPlainText.java From astor with GNU General Public License v2.0

5 votes

public void head(Node node, int depth) {
    String name = node.nodeName();
    if (node instanceof TextNode)
        append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
    else if (name.equals("li"))
        append("\n * ");
    else if (name.equals("dt"))
        append("  ");
    else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr"))
        append("\n");
}

Source File: Element.java From astor with GNU General Public License v2.0

5 votes

private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
    String text = textNode.getWholeText();

    if (preserveWhitespace(textNode.parentNode))
        accum.append(text);
    else
        StringUtil.appendNormalisedWhitespace(accum, text, TextNode.lastCharIsWhitespace(accum));
}

Source File: Element.java From astor with GNU General Public License v2.0

5 votes

private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
    String text = textNode.getWholeText();

    if (preserveWhitespace(textNode.parentNode))
        accum.append(text);
    else
        StringUtil.appendNormalisedWhitespace(accum, text, TextNode.lastCharIsWhitespace(accum));
}

Source File: HtmlTreeBuilder.java From astor with GNU General Public License v2.0

5 votes

private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element el = stack.get(pos);
        String elName = el.nodeName();
        if (StringUtil.in(elName, targetNames))
            return true;
        if (StringUtil.in(elName, baseTypes))
            return false;
        if (extraTypes != null && StringUtil.in(elName, extraTypes))
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}

org.jsoup.helper.StringUtil Java Examples