org.jsoup.helper.StringUtil Java Examples
The following examples show how to use
org.jsoup.helper.StringUtil.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RuleDatabaseService.java From WeEvent with Apache License 2.0 | 6 votes |
public List<RuleDatabaseEntity> getRuleDataBaseList(HttpServletRequest request, RuleDatabaseEntity ruleDatabaseEntity) throws GovernanceException { try { ruleDatabaseEntity.setSystemTag(false); Example<RuleDatabaseEntity> entityExample = Example.of(ruleDatabaseEntity); List<RuleDatabaseEntity> ruleDatabaseEntityList = ruleDatabaseRepository.findAll(entityExample); ruleDatabaseEntityList.forEach(ruleDataBase -> { String dataBaseUrl = ruleDataBase.getDatabaseUrl(); if (StringUtil.isBlank(ruleDataBase.getOptionalParameter())) { ruleDataBase.setDatabaseUrl(dataBaseUrl); } else { ruleDataBase.setDatabaseUrl(dataBaseUrl + "?" + ruleDataBase.getOptionalParameter()); } }); return ruleDatabaseEntityList; } catch (Exception e) { log.error("get ruleDatabaseList fail", e); throw new GovernanceException("get ruleDatabaseList fail", e); } }
Example #2
Source File: RuleEngineService.java From WeEvent with Apache License 2.0 | 6 votes |
private void checkStartRuleRequired(RuleEngineEntity rule) throws GovernanceException { if (StringUtil.isBlank(rule.getRuleName())) { log.error("the ruleName is empty"); throw new GovernanceException("the ruleName is empty"); } if (rule.getUserId() == null) { log.error("the userId is empty"); throw new GovernanceException("the userId is empty"); } if (rule.getBrokerId() == null) { log.error("the brokerId is empty"); throw new GovernanceException("the brokerId is empty"); } if (StringUtil.isBlank(rule.getBrokerUrl())) { log.error("the brokerUrl is empty"); throw new GovernanceException("the brokerUrl is empty"); } this.checkField(rule); }
Example #3
Source File: HtmlParserTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void handlesInvalidDoctypes() { // would previously throw invalid name exception on empty doctype Document doc = Jsoup.parse("<!DOCTYPE>"); assertEquals( "<!doctype> <html> <head></head> <body></body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml())); doc = Jsoup.parse("<!DOCTYPE><html><p>Foo</p></html>"); assertEquals( "<!doctype> <html> <head></head> <body> <p>Foo</p> </body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml())); doc = Jsoup.parse("<!DOCTYPE \u0000>"); assertEquals( "<!doctype �> <html> <head></head> <body></body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml())); }
Example #4
Source File: Element.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Get a CSS selector that will uniquely select this element. * <p> * If the element has an ID, returns #id; * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'}, * followed by a unique selector for the element (tag.class.class:nth-child(n)). * </p> * * @return the CSS Path that can be used to retrieve the element in a selector. */ public String cssSelector() { if (id().length() > 0) return "#" + id(); // Translate HTML namespace ns:tag to CSS namespace syntax ns|tag String tagName = tagName().replace(':', '|'); StringBuilder selector = new StringBuilder(tagName); String classes = StringUtil.join(classNames(), "."); if (classes.length() > 0) selector.append('.').append(classes); if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node return selector.toString(); selector.insert(0, " > "); if (parent().select(selector.toString()).size() > 1) selector.append(String.format( ":nth-child(%d)", elementSiblingIndex() + 1)); return parent().cssSelector() + selector.toString(); }
Example #5
Source File: AbstractHACCommunicationManager.java From hybris-commerce-eclipse-plugin with Apache License 2.0 | 6 votes |
/** * Send HTTP GET request to {@link #endpointUrl}, updates {@link #csrfToken} * token * * @return true if {@link #endpointUrl} is accessible * @throws IOException * @throws ClientProtocolException * @throws AuthenticationException */ protected void fetchCsrfTokenFromHac() throws ClientProtocolException, IOException, AuthenticationException { final HttpGet getRequest = new HttpGet(getEndpointUrl()); try { final HttpResponse response = httpClient.execute(getRequest, getContext()); final String responseString = new BasicResponseHandler().handleResponse(response); csrfToken = getCsrfToken(responseString); if (StringUtil.isBlank(csrfToken)) { throw new AuthenticationException(ErrorMessage.CSRF_TOKEN_CANNOT_BE_OBTAINED); } } catch (UnknownHostException error) { final String errorMessage = error.getMessage(); final Matcher matcher = HACPreferenceConstants.HOST_REGEXP_PATTERN.matcher(getEndpointUrl()); if (matcher.find() && matcher.group(1).equals(errorMessage)) { throw new UnknownHostException( String.format(ErrorMessage.UNKNOWN_HOST_EXCEPTION_MESSAGE_FORMAT, matcher.group(1))); } throw error; } }
Example #6
Source File: Tokeniser.java From astor with GNU General Public License v2.0 | 6 votes |
/** * Utility method to consume reader and unescape entities found within. * @param inAttribute * @return unescaped string from reader */ String unescapeEntities(boolean inAttribute) { StringBuilder builder = StringUtil.stringBuilder(); while (!reader.isEmpty()) { builder.append(reader.consumeTo('&')); if (reader.matches('&')) { reader.consume(); int[] c = consumeCharacterReference(null, inAttribute); if (c == null || c.length==0) builder.append('&'); else { builder.appendCodePoint(c[0]); if (c.length == 2) builder.appendCodePoint(c[1]); } } } return builder.toString(); }
Example #7
Source File: RuleEngineService.java From WeEvent with Apache License 2.0 | 6 votes |
private void setRuleDataBaseUrl(RuleEngineEntity rule) { if (rule.getRuleDataBaseId() == null) { return; } RuleDatabaseEntity ruleDataBase = ruleDatabaseRepository.findById(rule.getRuleDataBaseId()); if (ruleDataBase != null) { String dbUrl = ruleDataBase.getDatabaseUrl() + "?user=" + ruleDataBase.getUsername() + "&password=" + ruleDataBase.getPassword() + "&tableName=" + rule.getTableName(); if (!StringUtil.isBlank(ruleDataBase.getOptionalParameter())) { dbUrl = dbUrl + "&" + ruleDataBase.getOptionalParameter(); } rule.setDatabaseUrl(dbUrl); rule.setDatabaseType(ruleDataBase.getDatabaseType()); log.info("dataBaseUrl:{}", rule.getDatabaseUrl()); } }
Example #8
Source File: HtmlToPlainText.java From eclipse.jdt.ls with Eclipse Public License 2.0 | 6 votes |
@Override public void head(Node node, int depth) { String name = node.nodeName(); if (node instanceof TextNode) { append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM. } else if (name.equals("ul")) { listNesting++; } else if (name.equals("li")) { append("\n "); for (int i = 1; i < listNesting; i++) { append(" "); } if (listNesting == 1) { append("* "); } else { append("- "); } } else if (name.equals("dt")) { append(" "); } else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) { append("\n"); } }
Example #9
Source File: RestAPINetworkManager.java From Shaarlier with GNU General Public License v3.0 | 5 votes |
@Override public Link prefetchLinkData(Link link) throws IOException { // TODO: There might be some bugs here, e.g: // - If the scheme used is not the same that on the saved link // - If there are tracking tags that don't match // We might want to open an Issue on Shaarli to get feedback String url = new URL(this.mAccount.getUrlShaarli() + LINK_URL).toExternalForm(); String body = this.newConnection(url, Connection.Method.GET) .data("offset", "0") .data("limit", "1") .data("searchterm", link.getUrl()) .execute() .body(); Log.d("RestAPI:prefetch", body); Link updatedLink = new Link(link); try { JSONArray resp = new JSONArray(body); if (resp.length() < 1) { Log.i("RestAPI:prefetch", "New link"); } else { Log.i("RestAPI:prefetch", "Found 1 link result (not new link)"); JSONObject returnedLink = resp.getJSONObject(0); updatedLink.setUrl(returnedLink.getString("url")); updatedLink.setTitle(returnedLink.getString("title")); updatedLink.setDescription(returnedLink.getString("description")); updatedLink.setPrivate(returnedLink.getBoolean("private")); JSONArray jsonTags = returnedLink.getJSONArray("tags"); ArrayList<String> tags = new ArrayList<>(); for (int i = 0; i < jsonTags.length(); i++) { tags.add(jsonTags.getString(i)); } updatedLink.setTags(StringUtil.join(tags, ", ")); } } catch (JSONException e) { Log.e("RestAPI:prefetch", e.toString()); } return updatedLink; }
Example #10
Source File: UrlConnectTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void fetchHandlesXmlAsHtmlWhenParserSet() throws IOException { // should auto-detect xml and use XML parser, unless explicitly requested the html parser String xmlUrl = "http://direct.infohound.net/tools/parse-xml.xml"; Connection con = Jsoup.connect(xmlUrl).parser(Parser.htmlParser()); Document doc = con.get(); Connection.Request req = con.request(); assertTrue(req.parser().getTreeBuilder() instanceof HtmlTreeBuilder); assertEquals("<html> <head></head> <body> <xml> <link>one <table> Two </table> </xml> </body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml())); }
Example #11
Source File: HtmlParserTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void doesNotFindShortestMatchingEntity() { // previous behaviour was to identify a possible entity, then chomp down the string until a match was found. // (as defined in html5.) However in practise that lead to spurious matches against the author's intent. String html = "One &clubsuite; ♣"; Document doc = Jsoup.parse(html); assertEquals(StringUtil.normaliseWhitespace("One &clubsuite; ♣"), doc.body().html()); }
Example #12
Source File: XmlTreeBuilderTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void handlesXmlDeclarationAsDeclaration() { String html = "<?xml encoding='UTF-8' ?><body>One</body><!-- comment -->"; Document doc = Jsoup.parse(html, "", Parser.xmlParser()); assertEquals("<?xml encoding='UTF-8' ?> <body> One </body> <!-- comment -->", StringUtil.normaliseWhitespace(doc.outerHtml())); assertEquals("#declaration", doc.childNode(0).nodeName()); assertEquals("#comment", doc.childNode(2).nodeName()); }
Example #13
Source File: HtmlToPlainText.java From astor with GNU General Public License v2.0 | 5 votes |
private void append(String text) { if (text.startsWith("\n")) width = 0; // reset counter if starts with a newline. only from formats above, not in natural text if (text.equals(" ") && (accum.length() == 0 || StringUtil.in(accum.substring(accum.length() - 1), " ", "\n"))) return; // don't accumulate long runs of empty spaces if (text.length() + width > maxWidth) { // won't fit, needs to wrap String words[] = text.split("\\s+"); for (int i = 0; i < words.length; i++) { String word = words[i]; boolean last = i == words.length - 1; if (!last) // insert a space if not the last word word = word + " "; if (word.length() + width > maxWidth) { // wrap and reset counter accum.append("\n").append(word); width = word.length(); } else { accum.append(word); width += word.length(); } } } else { // fits as is, without need to wrap text accum.append(text); width += text.length(); } }
Example #14
Source File: AbstractHACCommunicationManager.java From hybris-commerce-eclipse-plugin with Apache License 2.0 | 5 votes |
/** * Retrieves csrf token from response body * * @param responseBody * response body of GET method * @return csrf token * @throws AuthenticationException */ protected String getCsrfToken(String responseBody) throws AuthenticationException { if (StringUtil.isBlank(responseBody)) { throw new AuthenticationException(ErrorMessage.CSRF_RESPONSE_CANNOT_BE_BLANK); } final Document document = Jsoup.parse(responseBody); return document.select(Meta.CSRF_META_TAG).attr(Meta.CSRF_META_TAG_CONTENT); }
Example #15
Source File: HtmlToPlainText.java From intellij-quarkus with Eclipse Public License 2.0 | 5 votes |
@Override public void tail(Node node, int depth) { String name = node.nodeName(); if (StringUtil.in(name, "br", "dd", "dt", "p", "h1", "h2", "h3", "h4", "h5")) { append("\n"); } else if (StringUtil.in(name, "th", "td")) { append(" "); } else if (name.equals("a")) { append(String.format(" <%s>", node.absUrl("href"))); } else if (name.equals("ul")) { listNesting--; } }
Example #16
Source File: DocumentType.java From jsoup-learning with MIT License | 5 votes |
@Override void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) { accum.append("<!DOCTYPE ").append(attr("name")); if (!StringUtil.isBlank(attr("publicId"))) accum.append(" PUBLIC \"").append(attr("publicId")).append("\""); if (!StringUtil.isBlank(attr("systemId"))) accum.append(" \"").append(attr("systemId")).append("\""); accum.append('>'); }
Example #17
Source File: UrlConnectTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void fetchHandlesXmlAsHtmlWhenParserSet() throws IOException { // should auto-detect xml and use XML parser, unless explicitly requested the html parser String xmlUrl = "http://direct.infohound.net/tools/parse-xml.xml"; Connection con = Jsoup.connect(xmlUrl).parser(Parser.htmlParser()); Document doc = con.get(); Connection.Request req = con.request(); assertTrue(req.parser().getTreeBuilder() instanceof HtmlTreeBuilder); assertEquals("<html> <head></head> <body> <xml> <link>one <table> Two </table> </xml> </body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml())); }
Example #18
Source File: HtmlParserTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void caseSensitiveParseTree() { String html = "<r><X>A</X><y>B</y></r>"; Parser parser = Parser.htmlParser(); parser.settings(ParseSettings.preserveCase); Document doc = parser.parseInput(html, ""); assertEquals("<r> <X> A </X> <y> B </y> </r>", StringUtil.normaliseWhitespace(doc.body().html())); }
Example #19
Source File: HtmlParserTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void doesNotFindShortestMatchingEntity() { // previous behaviour was to identify a possible entity, then chomp down the string until a match was found. // (as defined in html5.) However in practise that lead to spurious matches against the author's intent. String html = "One &clubsuite; ♣"; Document doc = Jsoup.parse(html); assertEquals(StringUtil.normaliseWhitespace("One &clubsuite; ♣"), doc.body().html()); }
Example #20
Source File: HtmlParserTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void handleNullContextInParseFragment() { String html = "<ol><li>One</li></ol><p>Two</p>"; List<Node> nodes = Parser.parseFragment(html, null, "http://example.com/"); assertEquals(1, nodes.size()); // returns <html> node (not document) -- no context means doc gets created assertEquals("html", nodes.get(0).nodeName()); assertEquals("<html> <head></head> <body> <ol> <li>One</li> </ol> <p>Two</p> </body> </html>", StringUtil.normaliseWhitespace(nodes.get(0).outerHtml())); }
Example #21
Source File: HtmlTreeBuilder.java From jsoup-learning with MIT License | 5 votes |
private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) { Iterator<Element> it = stack.descendingIterator(); while (it.hasNext()) { Element el = it.next(); String elName = el.nodeName(); if (StringUtil.in(elName, targetNames)) return true; if (StringUtil.in(elName, baseTypes)) return false; if (extraTypes != null && StringUtil.in(elName, extraTypes)) return false; } Validate.fail("Should not be reachable"); return false; }
Example #22
Source File: UrlConnectTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void fetchHandlesXml() throws IOException { // should auto-detect xml and use XML parser, unless explicitly requested the html parser String xmlUrl = "http://direct.infohound.net/tools/parse-xml.xml"; Connection con = Jsoup.connect(xmlUrl); Document doc = con.get(); Connection.Request req = con.request(); assertTrue(req.parser().getTreeBuilder() instanceof XmlTreeBuilder); assertEquals("<xml> <link> one </link> <table> Two </table> </xml>", StringUtil.normaliseWhitespace(doc.outerHtml())); }
Example #23
Source File: HtmlTreeBuilder.java From astor with GNU General Public License v2.0 | 5 votes |
void popStackToClose(String... elNames) { for (int pos = stack.size() -1; pos >= 0; pos--) { Element next = stack.get(pos); stack.remove(pos); if (StringUtil.in(next.nodeName(), elNames)) break; } }
Example #24
Source File: HtmlTreeBuilder.java From astor with GNU General Public License v2.0 | 5 votes |
private void clearStackToContext(String... nodeNames) { for (int pos = stack.size() -1; pos >= 0; pos--) { Element next = stack.get(pos); if (StringUtil.in(next.nodeName(), nodeNames) || next.nodeName().equals("html")) break; else stack.remove(pos); } }
Example #25
Source File: HtmlParserTest.java From astor with GNU General Public License v2.0 | 5 votes |
@Test public void selfClosingOnNonvoidIsError() { String html = "<p>test</p><div /><div>Two</div>"; Parser parser = Parser.htmlParser().setTrackErrors(5); parser.parseInput(html, ""); assertEquals(1, parser.getErrors().size()); assertEquals("18: Tag cannot be self closing; not a void tag", parser.getErrors().get(0).toString()); assertFalse(Jsoup.isValid(html, Whitelist.relaxed())); String clean = Jsoup.clean(html, Whitelist.relaxed()); assertEquals("<p>test</p> <div></div> <div> Two </div>", StringUtil.normaliseWhitespace(clean)); }
Example #26
Source File: HtmlTreeBuilder.java From astor with GNU General Public License v2.0 | 5 votes |
boolean inSelectScope(String targetName) { for (int pos = stack.size() -1; pos >= 0; pos--) { Element el = stack.get(pos); String elName = el.nodeName(); if (elName.equals(targetName)) return true; if (!StringUtil.in(elName, TagSearchSelectScope)) // all elements except return false; } Validate.fail("Should not be reachable"); return false; }
Example #27
Source File: HtmlToPlainText.java From astor with GNU General Public License v2.0 | 5 votes |
public void head(Node node, int depth) { String name = node.nodeName(); if (node instanceof TextNode) append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM. else if (name.equals("li")) append("\n * "); else if (name.equals("dt")) append(" "); else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) append("\n"); }
Example #28
Source File: Element.java From astor with GNU General Public License v2.0 | 5 votes |
private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { String text = textNode.getWholeText(); if (preserveWhitespace(textNode.parentNode)) accum.append(text); else StringUtil.appendNormalisedWhitespace(accum, text, TextNode.lastCharIsWhitespace(accum)); }
Example #29
Source File: Element.java From astor with GNU General Public License v2.0 | 5 votes |
private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { String text = textNode.getWholeText(); if (preserveWhitespace(textNode.parentNode)) accum.append(text); else StringUtil.appendNormalisedWhitespace(accum, text, TextNode.lastCharIsWhitespace(accum)); }
Example #30
Source File: HtmlTreeBuilder.java From astor with GNU General Public License v2.0 | 5 votes |
private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) { for (int pos = stack.size() -1; pos >= 0; pos--) { Element el = stack.get(pos); String elName = el.nodeName(); if (StringUtil.in(elName, targetNames)) return true; if (StringUtil.in(elName, baseTypes)) return false; if (extraTypes != null && StringUtil.in(elName, extraTypes)) return false; } Validate.fail("Should not be reachable"); return false; }