Java Code Examples for org.jsoup.nodes.Document#selectFirst()
The following examples show how to use
org.jsoup.nodes.Document#selectFirst() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: NicoAudioSourceManager.java From kyoko with MIT License | 6 votes |
private AudioTrack extractTrackFromHtml(String videoId, Document document) { Element element = document.selectFirst("#js-initial-watch-data"); if (element != null) { String data = element.attributes().get("data-api-data"); if (data == null) { return null; } JsonObject object = new JsonObject(data); JsonObject video = object.getJsonObject("video"); String uploader = object.getJsonObject("owner").getString("nickname"); String title = video.getString("title"); long duration = video.getLong("duration") * 1000; return new NicoAudioTrack(new AudioTrackInfo(title, uploader, duration, videoId, false, getWatchUrl(videoId)), this); } return null; }
Example 2
Source File: PluginStatusReportViewBuilderTest.java From kubernetes-elastic-agents with Apache License 2.0 | 6 votes |
@Test public void shouldBuildStatusReportHtmlWithAgentStatusReportLink() throws IOException, TemplateException { KubernetesPod pod = mock(KubernetesPod.class); when(pod.getJobIdentifier()).thenReturn(new JobIdentifier(3243546575676657L)); when(pod.getCreationTimestamp()).thenReturn(new Date()); KubernetesNode node = mock(KubernetesNode.class); when(node.getPods()).thenReturn(singletonList(pod)); KubernetesCluster cluster = mock(KubernetesCluster.class); when(cluster.getNodes()).thenReturn(singletonList(node)); when(cluster.getPluginId()).thenReturn("cd.go.contrib.elastic.agent.kubernetes"); PluginStatusReportViewBuilder builder = PluginStatusReportViewBuilder.instance(); String build = builder.build(builder.getTemplate("status-report.template.ftlh"), cluster); Document document = Jsoup.parse(build); Element link = document.selectFirst("tbody tr td a"); System.out.println(link); assertThat(link.attr("href"), is("/go/admin/status_reports/cd.go.contrib.elastic.agent.kubernetes/agent/?job_id=3243546575676657")); }
Example 3
Source File: HtmlAmpUtil.java From V2EX with GNU General Public License v3.0 | 6 votes |
public static Topic getTopicWithReply(String ampHtml){ Topic topic = new Topic(); Document document = Jsoup.parse(ampHtml); Element topicNode = document.selectFirst(".topic_node"); Member member = new Member(document.selectFirst(".topic_author").text(), document.selectFirst(".topic_author_avatar").attr("src")); topic.setTitle(document.selectFirst(".topic_title").text()); topic.setMember(member); topic.setContent(document.selectFirst(".topic_content").html()); topic.setClicks(HtmlUtil.matcherGroup1Int(Pattern.compile("(\\d+)"), document.selectFirst(".topic_hits").text())); topic.setAgo(document.selectFirst(".topic_created").text()); topic.setNode(new Node(HtmlUtil.matcherGroup1(Pattern.compile("/go/(\\w+)"), topicNode.html()),topicNode.selectFirst("a").text())); topic.setClicks(HtmlUtil.matcherGroup1Int(Pattern.compile("(\\d+)"), document.selectFirst(".topic_stats").text())); return topic; }
Example 4
Source File: IPUtils.java From HttpProxy with GNU General Public License v3.0 | 6 votes |
public static String getMyIp() { try { String html = HttpUtils.getResponseContent(MY_IP_API); Document doc = Jsoup.parse(html); Element element = doc.selectFirst("div.tableNormal"); Element ele = element.selectFirst("table").select("td").get(1); String ip = element.selectFirst("a").text(); // System.out.println(ip); return ip; } catch (Exception e) { e.printStackTrace(); } return null; }
Example 5
Source File: GetRatings.java From schedge with MIT License | 6 votes |
private static Float parseRating(String rawData) { rawData = rawData.trim(); if (rawData == null || rawData.equals("")) { logger.warn("Got bad data: empty string"); return null; } Document doc = Jsoup.parse(rawData); Element body = doc.selectFirst("div#root"); if (body == null) return null; Element ratingBody = body.selectFirst("div.TeacherInfo__StyledTeacher-ti1fio-1.fIlNyU"); Element ratingInnerBody = ratingBody.selectFirst("div").selectFirst( "div.RatingValue__AvgRating-qw8sqy-1.gIgExh"); String ratingValue = ratingInnerBody .selectFirst("div.RatingValue__Numerator-qw8sqy-2.gxuTRq") .html() .trim(); try { return Float.parseFloat(ratingValue); } catch (NumberFormatException exception) { logger.warn("The instructor exist but having N/A rating"); return null; } }
Example 6
Source File: ApkVersionHelper.java From XposedSmsCode with GNU General Public License v3.0 | 6 votes |
static ApkVersion parseFromCoolApk(String html) { Document document = Jsoup.parse(html); String versionName = "-1"; String versionInfo = null; if (document != null) { Element element = document.selectFirst("title"); if (element != null) { String text = element.text(); Pattern p = Pattern.compile("\\d(\\.\\d)+"); Matcher m = p.matcher(text); if (m.find()) { versionName = m.group(); } } Element rootInfoEle = document.selectFirst(".apk_left_title:contains(新版特性)"); if (rootInfoEle != null) { Element infoEle = rootInfoEle.selectFirst(".apk_left_title_info"); if (infoEle != null) { versionInfo = HtmlCompat.fromHtml(infoEle.toString(), HtmlCompat.FROM_HTML_MODE_COMPACT) .toString().trim(); } } } return new ApkVersion(versionName, versionInfo); }
Example 7
Source File: SpringTestAPITest.java From GreenSummer with GNU Lesser General Public License v2.1 | 5 votes |
@Test public void basicXSLTTestIsProcessedCorrectly() throws Exception { // Obtaining response and basic tests MvcResult response = this.mvc // .perform(get("/test")) // //.andDo(print()) // .andExpect(status().isOk()) .andExpect(content().contentTypeCompatibleWith(MediaType.TEXT_HTML)) .andExpect(content().string(containsString("Test label"))) // .andReturn() // ; // Check the model final Object model = response.getModelAndView().getModel().get(XsltConfiguration.XML_SOURCE_TAG); assertNotNull("Model object returned is not null", model); assertThat("Model object is of the appropriate class", model, instanceOf(App.class)); // App app = (App) model; // Further App checking... // Check the response Document html = Jsoup.parse(response.getResponse().getContentAsString()); Element headerElement = html.selectFirst("h1"); assertNotNull("We have a title", headerElement); assertThat("We have a title", "TEST", equalTo(headerElement.text())); }
Example 8
Source File: WhenBackendIsRevealJs.java From asciidoctorj with Apache License 2.0 | 5 votes |
@Test public void should_create_simple_slides() throws IOException { String filename = "sample"; File inputFile = new File("build/resources/test/" + filename + ".adoc"); File outputFile1 = new File(inputFile.getParentFile(), filename + ".html"); removeFileIfItExists(outputFile1); AsciidoctorInvoker.main(new String[]{ "-b", "revealjs", "-r", "asciidoctor-diagram", "-a", "revealjsdir=https://cdn.jsdelivr.net/npm/reveal.js@3.9.2", inputFile.getAbsolutePath() }); Document doc = Jsoup.parse(outputFile1, "UTF-8"); assertThat(outputFile1.exists(), is(true)); List<String> stylesheets = doc.head().getElementsByTag("link").stream() .filter(element -> "stylesheet".equals(element.attr("rel"))) .map(element -> element.attr("href")) .collect(toList()); assertThat(stylesheets, hasItems( "https://cdn.jsdelivr.net/npm/reveal.js@3.9.2/css/reveal.css", "https://cdn.jsdelivr.net/npm/reveal.js@3.9.2/css/theme/black.css")); Element diagramSlide = doc.selectFirst("#diagram"); assertThat(diagramSlide, notNullValue()); Element diagram = diagramSlide.selectFirst("div.imageblock img"); assertThat(diagram, notNullValue()); assertThat(diagram.attr("src"), startsWith("data:image/svg+xml;base64,")); }
Example 9
Source File: PorncomixParser.java From Hentoid with Apache License 2.0 | 5 votes |
@Override protected List<String> parseImages(@NonNull Content content) throws Exception { // Fetch the book gallery page Document doc = getOnlineDocument(content.getGalleryUrl()); if (null == doc) throw new ParseException("Document unreachable : " + content.getGalleryUrl()); Element mangaPagesContainer = doc.selectFirst(".reading-content script"); List<Element> galleryPages = doc.select("#dgwt-jg-2 a"); // same for zone List<Element> galleryPages2 = doc.select(".unite-gallery img"); // same for zone List<Element> bestPages = doc.select("#gallery-2 a"); return parseImages(mangaPagesContainer, galleryPages, galleryPages2, bestPages); }
Example 10
Source File: MailSteps.java From NoraUi with GNU Affero General Public License v3.0 | 5 votes |
private void validateActivationLink(String subjectMail, String firstCssQuery, Message message) throws MessagingException, IOException, TechnicalException, FailureException { final Document doc = Jsoup.parse(getTextFromMessage(message)); final Element link = doc.selectFirst(firstCssQuery); try { final String response = httpService.get(link.attr("href")); log.debug("response is {}.", response); } catch (final HttpServiceException e) { log.error(Messages.format(Messages.getMessage(Messages.FAIL_MESSAGE_MAIL_ACTIVATION), subjectMail), e); new Result.Failure<>("", Messages.format(Messages.getMessage(Messages.FAIL_MESSAGE_MAIL_ACTIVATION), subjectMail), false, Context.getCallBack(Callbacks.RESTART_WEB_DRIVER)); } }
Example 11
Source File: HtmlUtil.java From V2EX with GNU General Public License v3.0 | 5 votes |
public static Topic getTopicAndReplies(String html){ Topic topic = new Topic(); Document document = Jsoup.parse(html); Element header = document.selectFirst("#Main > .box"); String headerHtml = header.toString(); Element middleEle = document.selectFirst("#Main > .box > .cell > span"); Element contentEle = header.selectFirst(".topic_content"); Element subtleEle = header.selectFirst(".subtle"); String publishedTime = document.selectFirst("meta[property=article:published_time]") .attr("content") .replaceAll("[TZ]", " "); topic.setCreated(TimeUtil.strToTimestamp(publishedTime,null)); topic.setId(matcherGroup1Int(Pattern.compile("(\\d{2,})"), document.selectFirst("meta[property=og:url]").attr("content"))); topic.setTitle(header.selectFirst(".header > h1").text()); topic.setClicks(matcherGroup1Int(PATTERN_TOPIC_CLICK, headerHtml)); topic.setAgo(matcherGroup1(Pattern.compile("· ([^·]+) ·"), header.selectFirst(".header > small").toString())); topic.setFavors(matcherGroup1Int(PATTERN_TOPIC_FAVORS, headerHtml)); topic.setContent_rendered("\n" + (contentEle == null ? "<br>" : contentEle.toString()) + (subtleEle == null ? " " : subtleEle.toString()) + "\n\t---"); topic.setMember(new Member( matcherGroup1(PATTERN_TOPIC_USERNAME, headerHtml), matcherGroup1(PATTERN_TOPIC_USER_AVATAR, headerHtml))); topic.setNode(new Node( document.selectFirst("meta[property=article:tag]").attr("content"), document.selectFirst("meta[property=article:section]").attr("content"))); if (middleEle != null){ String lastTouched = matcherGroup1(Pattern.compile("直到 ([^+]+)"), middleEle.toString()); topic.setLast_touched(lastTouched.isEmpty() ? 0 : TimeUtil.strToTimestamp(lastTouched,null)); topic.setReplies(matcherGroup1Int(PATTERN_TOPIC_REPLY_COUNT, middleEle.toString())); } topic.setReplyList(getReplies(document, topic.getMember().getUsername())); return topic; }
Example 12
Source File: Abcyzf.java From runscore with Apache License 2.0 | 5 votes |
@Override public String startPay(String orderNo, Double amount, String channelCode) { String pid = ConfigHolder.getConfigValue("abcyzf.pid"); String notifyUrl = ConfigHolder.getConfigValue("abcyzf.notifyUrl"); String returnUrl = ConfigHolder.getConfigValue("abcyzf.returnUrl"); String name = ConfigHolder.getConfigValue("abcyzf.name"); String signType = "MD5"; Map<String, Object> params = new HashMap<>(); params.put("pid", pid); params.put("type", channelCode); params.put("out_trade_no", orderNo); params.put("notify_url", notifyUrl); params.put("return_url", returnUrl); params.put("name", name); params.put("money", String.valueOf(amount)); params.put("sign_type", signType); params.put("sign", generateRequestSign(orderNo, String.valueOf(amount), channelCode)); String result = HttpUtil.get(ConfigHolder.getConfigValue("abcyzf.payUrl"), params); System.err.println(result); if (StrUtil.isBlank(result)) { throw new BizException(BizError.发起支付异常); } String payUrl = result; try { Document document = Jsoup.parse(result); Element element = document.selectFirst("script"); if (element != null) { payUrl = element.data().replace("window.location.href='", ""); payUrl = payUrl.substring(0, payUrl.length() - 2); } } catch (Exception e) { throw new BizException(BizError.发起支付异常); } return payUrl; }
Example 13
Source File: RssMakeService.java From torrssen2 with MIT License | 5 votes |
private String getMagnetString1(String urlString) throws Exception { Document doc = getDoc(urlString); Element el = doc.selectFirst(".btn.btn-success.btn-xs"); Pattern pattern = Pattern.compile("magnet_link\\(\\'(.{1,})\\'\\);", Pattern.CASE_INSENSITIVE); Matcher matcher = pattern.matcher(el.attr("onclick")); if (matcher.matches()) { return "magnet:?xt=urn:btih:" + matcher.group(1); } else { return null; } }
Example 14
Source File: SonosServiceRegistration.java From airsonic-advanced with GNU General Public License v3.0 | 5 votes |
private String retrieveCsrfToken(String controllerUrl) throws IOException { Document doc = Jsoup.connect(controllerUrl).get(); Element element = doc.selectFirst("input[name='csrfToken']"); if (element != null) { return element.attributes().get("value"); } return null; }
Example 15
Source File: ParseEnroll.java From schedge with MIT License | 5 votes |
public static void parseRegistrationNumber(String data) { Document secData = Jsoup.parse(data); Element body = secData.selectFirst("body"); Element section = body.selectFirst("section.main > section"); Elements sections = section.select("div"); for (Element element : sections) { if (element.text().equals("Results") || element.text().equals("Okay")) { continue; } System.out.println(element.text()); } }
Example 16
Source File: ParseSection.java From schedge with MIT License | 5 votes |
public static SectionAttribute parse(@NotNull String rawData) { logger.debug("parsing raw catalog section data into SectionAttribute..."); rawData = rawData.trim(); if (rawData.equals("")) { logger.warn("Got bad data: empty string"); return null; // the course doesn't exist } Document doc = Jsoup.parse(rawData); Element failed = doc.selectFirst("div.alert.alert-info"); if (failed != null) { logger.warn("Got bad data: " + failed.text()); return null; // the course doesn't exist } Elements elements = doc.select("a"); String link = null; for (Element element : elements) { String el = element.attr("href"); if (el.contains("mapBuilding")) { link = el; } } doc.select("a").unwrap(); doc.select("i").unwrap(); doc.select("b").unwrap(); Element outerDataSection = doc.selectFirst("body > section.main"); Element innerDataSection = outerDataSection.selectFirst("> section"); Element courseNameDiv = innerDataSection.selectFirst("> div.primary-head"); String courseName = courseNameDiv.text(); Elements dataDivs = innerDataSection.select("> div.section-content.clearfix"); Map<String, String> secData = parseSectionAttributes(dataDivs); return parsingElements(secData, courseName, link); }
Example 17
Source File: GetRatings.java From schedge with MIT License | 5 votes |
private static String parseLink(String rawData) { logger.debug("parsing raw RMP data to link..."); rawData = rawData.trim(); if (rawData == null || rawData.equals("")) { logger.warn("Got bad data: empty string"); return null; } Document doc = Jsoup.parse(rawData); Element body = doc.selectFirst("body.search_results"); Element container = body.selectFirst("div#container"); Element innerBody = container.selectFirst("div#body"); Element mainContent = innerBody.selectFirst("div#mainContent"); Element resBox = mainContent.selectFirst("div#searchResultsBox"); Element listings = resBox.selectFirst("div.listings-wrap"); if (listings == null) { return null; } Element innerListings = listings.selectFirst("ul.listings"); Elements professors = innerListings.select("li.listing.PROFESSOR"); for (Element element : professors) { String school = element.selectFirst("span.sub").toString(); //<- Bugs at this line if (school.contains("New York University") || school.contains("NYU")) { return element.selectFirst("a").attr("href").split("=")[1]; } } return null; }