Java Code Examples for org.jsoup.nodes.Document#selectFirst()

The following examples show how to use org.jsoup.nodes.Document#selectFirst() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: NicoAudioSourceManager.java    From kyoko with MIT License 6 votes vote down vote up
private AudioTrack extractTrackFromHtml(String videoId, Document document) {
    Element element = document.selectFirst("#js-initial-watch-data");
    if (element != null) {
        String data = element.attributes().get("data-api-data");
        if (data == null) {
            return null;
        }

        JsonObject object = new JsonObject(data);
        JsonObject video = object.getJsonObject("video");

        String uploader = object.getJsonObject("owner").getString("nickname");
        String title = video.getString("title");
        long duration = video.getLong("duration") * 1000;

        return new NicoAudioTrack(new AudioTrackInfo(title, uploader, duration, videoId, false, getWatchUrl(videoId)), this);
    }
    return null;
}
 
Example 2
Source File: PluginStatusReportViewBuilderTest.java    From kubernetes-elastic-agents with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldBuildStatusReportHtmlWithAgentStatusReportLink() throws IOException, TemplateException {
  KubernetesPod pod = mock(KubernetesPod.class);
  when(pod.getJobIdentifier()).thenReturn(new JobIdentifier(3243546575676657L));
  when(pod.getCreationTimestamp()).thenReturn(new Date());

  KubernetesNode node = mock(KubernetesNode.class);
  when(node.getPods()).thenReturn(singletonList(pod));

  KubernetesCluster cluster = mock(KubernetesCluster.class);
  when(cluster.getNodes()).thenReturn(singletonList(node));
  when(cluster.getPluginId()).thenReturn("cd.go.contrib.elastic.agent.kubernetes");
  PluginStatusReportViewBuilder builder = PluginStatusReportViewBuilder.instance();

  String build = builder.build(builder.getTemplate("status-report.template.ftlh"), cluster);

  Document document = Jsoup.parse(build);

  Element link = document.selectFirst("tbody tr td a");
  System.out.println(link);

  assertThat(link.attr("href"), is("/go/admin/status_reports/cd.go.contrib.elastic.agent.kubernetes/agent/?job_id=3243546575676657"));
}
 
Example 3
Source File: HtmlAmpUtil.java    From V2EX with GNU General Public License v3.0 6 votes vote down vote up
public static Topic getTopicWithReply(String ampHtml){

        Topic topic = new Topic();
        Document document = Jsoup.parse(ampHtml);
        Element topicNode = document.selectFirst(".topic_node");

        Member member = new Member(document.selectFirst(".topic_author").text(), document.selectFirst(".topic_author_avatar").attr("src"));

        topic.setTitle(document.selectFirst(".topic_title").text());
        topic.setMember(member);
        topic.setContent(document.selectFirst(".topic_content").html());
        topic.setClicks(HtmlUtil.matcherGroup1Int(Pattern.compile("(\\d+)"), document.selectFirst(".topic_hits").text()));
        topic.setAgo(document.selectFirst(".topic_created").text());
        topic.setNode(new Node(HtmlUtil.matcherGroup1(Pattern.compile("/go/(\\w+)"), topicNode.html()),topicNode.selectFirst("a").text()));
        topic.setClicks(HtmlUtil.matcherGroup1Int(Pattern.compile("(\\d+)"), document.selectFirst(".topic_stats").text()));

        return topic;
    }
 
Example 4
Source File: IPUtils.java    From HttpProxy with GNU General Public License v3.0 6 votes vote down vote up
public static String getMyIp() {
    try {
        String html = HttpUtils.getResponseContent(MY_IP_API);

        Document doc = Jsoup.parse(html);
        Element element = doc.selectFirst("div.tableNormal");

        Element ele = element.selectFirst("table").select("td").get(1);

        String ip = element.selectFirst("a").text();

        // System.out.println(ip);
        return ip;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}
 
Example 5
Source File: GetRatings.java    From schedge with MIT License 6 votes vote down vote up
private static Float parseRating(String rawData) {
  rawData = rawData.trim();
  if (rawData == null || rawData.equals("")) {
    logger.warn("Got bad data: empty string");
    return null;
  }
  Document doc = Jsoup.parse(rawData);
  Element body = doc.selectFirst("div#root");
  if (body == null)
    return null;
  Element ratingBody =
      body.selectFirst("div.TeacherInfo__StyledTeacher-ti1fio-1.fIlNyU");
  Element ratingInnerBody = ratingBody.selectFirst("div").selectFirst(
      "div.RatingValue__AvgRating-qw8sqy-1.gIgExh");
  String ratingValue =
      ratingInnerBody
          .selectFirst("div.RatingValue__Numerator-qw8sqy-2.gxuTRq")
          .html()
          .trim();
  try {
    return Float.parseFloat(ratingValue);
  } catch (NumberFormatException exception) {
    logger.warn("The instructor exist but having N/A rating");
    return null;
  }
}
 
Example 6
Source File: ApkVersionHelper.java    From XposedSmsCode with GNU General Public License v3.0 6 votes vote down vote up
static ApkVersion parseFromCoolApk(String html) {
    Document document = Jsoup.parse(html);
    String versionName = "-1";
    String versionInfo = null;
    if (document != null) {
        Element element = document.selectFirst("title");
        if (element != null) {
            String text = element.text();
            Pattern p = Pattern.compile("\\d(\\.\\d)+");
            Matcher m = p.matcher(text);
            if (m.find()) {
                versionName = m.group();
            }
        }

        Element rootInfoEle = document.selectFirst(".apk_left_title:contains(新版特性)");
        if (rootInfoEle != null) {
            Element infoEle = rootInfoEle.selectFirst(".apk_left_title_info");
            if (infoEle != null) {
                versionInfo = HtmlCompat.fromHtml(infoEle.toString(), HtmlCompat.FROM_HTML_MODE_COMPACT)
                        .toString().trim();
            }
        }
    }
    return new ApkVersion(versionName, versionInfo);
}
 
Example 7
Source File: SpringTestAPITest.java    From GreenSummer with GNU Lesser General Public License v2.1 5 votes vote down vote up
@Test
public void basicXSLTTestIsProcessedCorrectly() throws Exception {

	// Obtaining response and basic tests
	MvcResult response = this.mvc
			//
			.perform(get("/test"))
			//
			//.andDo(print())
			//
			.andExpect(status().isOk())
			.andExpect(content().contentTypeCompatibleWith(MediaType.TEXT_HTML))
			.andExpect(content().string(containsString("Test label")))
			//
			.andReturn()
			//
			;
	// Check the model
	final Object model = response.getModelAndView().getModel().get(XsltConfiguration.XML_SOURCE_TAG);
	assertNotNull("Model object returned is not null", model);
	assertThat("Model object is of the appropriate class", model, instanceOf(App.class));
	// App app = (App) model;
	// Further App checking...

	// Check the response
	Document html = Jsoup.parse(response.getResponse().getContentAsString());

	Element headerElement = html.selectFirst("h1");
	assertNotNull("We have a title", headerElement);
	assertThat("We have a title", "TEST", equalTo(headerElement.text()));
}
 
Example 8
Source File: WhenBackendIsRevealJs.java    From asciidoctorj with Apache License 2.0 5 votes vote down vote up
@Test
public void should_create_simple_slides() throws IOException {
    String filename = "sample";
    File inputFile = new File("build/resources/test/" + filename + ".adoc");
    File outputFile1 = new File(inputFile.getParentFile(), filename + ".html");
    removeFileIfItExists(outputFile1);

    AsciidoctorInvoker.main(new String[]{
        "-b", "revealjs",
        "-r", "asciidoctor-diagram",
        "-a", "revealjsdir=https://cdn.jsdelivr.net/npm/reveal.js@3.9.2",
        inputFile.getAbsolutePath()
    });

    Document doc = Jsoup.parse(outputFile1, "UTF-8");

    assertThat(outputFile1.exists(), is(true));

    List<String> stylesheets = doc.head().getElementsByTag("link").stream()
        .filter(element -> "stylesheet".equals(element.attr("rel")))
        .map(element -> element.attr("href"))
        .collect(toList());
    assertThat(stylesheets,
        hasItems(
            "https://cdn.jsdelivr.net/npm/reveal.js@3.9.2/css/reveal.css",
            "https://cdn.jsdelivr.net/npm/reveal.js@3.9.2/css/theme/black.css"));

    Element diagramSlide = doc.selectFirst("#diagram");
    assertThat(diagramSlide, notNullValue());

    Element diagram = diagramSlide.selectFirst("div.imageblock img");
    assertThat(diagram, notNullValue());

    assertThat(diagram.attr("src"), startsWith("data:image/svg+xml;base64,"));
}
 
Example 9
Source File: PorncomixParser.java    From Hentoid with Apache License 2.0 5 votes vote down vote up
@Override
protected List<String> parseImages(@NonNull Content content) throws Exception {
    // Fetch the book gallery page
    Document doc = getOnlineDocument(content.getGalleryUrl());
    if (null == doc)
        throw new ParseException("Document unreachable : " + content.getGalleryUrl());

    Element mangaPagesContainer = doc.selectFirst(".reading-content script");
    List<Element> galleryPages = doc.select("#dgwt-jg-2 a"); // same for zone
    List<Element> galleryPages2 = doc.select(".unite-gallery img"); // same for zone
    List<Element> bestPages = doc.select("#gallery-2 a");

    return parseImages(mangaPagesContainer, galleryPages, galleryPages2, bestPages);
}
 
Example 10
Source File: MailSteps.java    From NoraUi with GNU Affero General Public License v3.0 5 votes vote down vote up
private void validateActivationLink(String subjectMail, String firstCssQuery, Message message) throws MessagingException, IOException, TechnicalException, FailureException {
    final Document doc = Jsoup.parse(getTextFromMessage(message));
    final Element link = doc.selectFirst(firstCssQuery);
    try {
        final String response = httpService.get(link.attr("href"));
        log.debug("response is {}.", response);
    } catch (final HttpServiceException e) {
        log.error(Messages.format(Messages.getMessage(Messages.FAIL_MESSAGE_MAIL_ACTIVATION), subjectMail), e);
        new Result.Failure<>("", Messages.format(Messages.getMessage(Messages.FAIL_MESSAGE_MAIL_ACTIVATION), subjectMail), false, Context.getCallBack(Callbacks.RESTART_WEB_DRIVER));
    }
}
 
Example 11
Source File: HtmlUtil.java    From V2EX with GNU General Public License v3.0 5 votes vote down vote up
public static Topic getTopicAndReplies(String html){

        Topic topic = new Topic();
        Document document = Jsoup.parse(html);
        Element header = document.selectFirst("#Main > .box");
        String headerHtml = header.toString();
        Element middleEle = document.selectFirst("#Main > .box > .cell > span");
        Element contentEle = header.selectFirst(".topic_content");
        Element subtleEle = header.selectFirst(".subtle");
        String publishedTime = document.selectFirst("meta[property=article:published_time]")
                .attr("content")
                .replaceAll("[TZ]", " ");

        topic.setCreated(TimeUtil.strToTimestamp(publishedTime,null));
        topic.setId(matcherGroup1Int(Pattern.compile("(\\d{2,})"),
                document.selectFirst("meta[property=og:url]").attr("content")));
        topic.setTitle(header.selectFirst(".header > h1").text());
        topic.setClicks(matcherGroup1Int(PATTERN_TOPIC_CLICK, headerHtml));
        topic.setAgo(matcherGroup1(Pattern.compile("· ([^·]+) ·"),
                header.selectFirst(".header > small").toString()));
        topic.setFavors(matcherGroup1Int(PATTERN_TOPIC_FAVORS, headerHtml));
        topic.setContent_rendered("\n"
                + (contentEle == null ? "<br>" : contentEle.toString())
                + (subtleEle == null ? " " : subtleEle.toString())
                + "\n\t---");
        topic.setMember(new Member(
                matcherGroup1(PATTERN_TOPIC_USERNAME, headerHtml),
                matcherGroup1(PATTERN_TOPIC_USER_AVATAR, headerHtml)));
        topic.setNode(new Node(
                document.selectFirst("meta[property=article:tag]").attr("content"),
                document.selectFirst("meta[property=article:section]").attr("content")));

        if (middleEle != null){
            String lastTouched = matcherGroup1(Pattern.compile("直到 ([^+]+)"), middleEle.toString());
            topic.setLast_touched(lastTouched.isEmpty() ? 0 : TimeUtil.strToTimestamp(lastTouched,null));
            topic.setReplies(matcherGroup1Int(PATTERN_TOPIC_REPLY_COUNT, middleEle.toString()));
        }
        topic.setReplyList(getReplies(document, topic.getMember().getUsername()));
        return topic;
    }
 
Example 12
Source File: Abcyzf.java    From runscore with Apache License 2.0 5 votes vote down vote up
@Override
public String startPay(String orderNo, Double amount, String channelCode) {
	String pid = ConfigHolder.getConfigValue("abcyzf.pid");
	String notifyUrl = ConfigHolder.getConfigValue("abcyzf.notifyUrl");
	String returnUrl = ConfigHolder.getConfigValue("abcyzf.returnUrl");
	String name = ConfigHolder.getConfigValue("abcyzf.name");
	String signType = "MD5";
	Map<String, Object> params = new HashMap<>();
	params.put("pid", pid);
	params.put("type", channelCode);
	params.put("out_trade_no", orderNo);
	params.put("notify_url", notifyUrl);
	params.put("return_url", returnUrl);
	params.put("name", name);
	params.put("money", String.valueOf(amount));
	params.put("sign_type", signType);
	params.put("sign", generateRequestSign(orderNo, String.valueOf(amount), channelCode));

	String result = HttpUtil.get(ConfigHolder.getConfigValue("abcyzf.payUrl"), params);
	System.err.println(result);
	if (StrUtil.isBlank(result)) {
		throw new BizException(BizError.发起支付异常);
	}

	String payUrl = result;
	try {
		Document document = Jsoup.parse(result);
		Element element = document.selectFirst("script");
		if (element != null) {
			payUrl = element.data().replace("window.location.href='", "");
			payUrl = payUrl.substring(0, payUrl.length() - 2);
		}
	} catch (Exception e) {
		throw new BizException(BizError.发起支付异常);
	}
	return payUrl;
}
 
Example 13
Source File: RssMakeService.java    From torrssen2 with MIT License 5 votes vote down vote up
private String getMagnetString1(String urlString) throws Exception {
    Document doc = getDoc(urlString);

    Element el = doc.selectFirst(".btn.btn-success.btn-xs");

    Pattern pattern = Pattern.compile("magnet_link\\(\\'(.{1,})\\'\\);", Pattern.CASE_INSENSITIVE);
    Matcher matcher = pattern.matcher(el.attr("onclick"));

    if (matcher.matches()) {
        return "magnet:?xt=urn:btih:" + matcher.group(1);
    } else {
        return null;
    }
}
 
Example 14
Source File: SonosServiceRegistration.java    From airsonic-advanced with GNU General Public License v3.0 5 votes vote down vote up
private String retrieveCsrfToken(String controllerUrl) throws IOException {
    Document doc = Jsoup.connect(controllerUrl).get();
    Element element = doc.selectFirst("input[name='csrfToken']");

    if (element != null) {
        return element.attributes().get("value");
    }

    return null;
}
 
Example 15
Source File: ParseEnroll.java    From schedge with MIT License 5 votes vote down vote up
public static void parseRegistrationNumber(String data) {
  Document secData = Jsoup.parse(data);
  Element body = secData.selectFirst("body");
  Element section = body.selectFirst("section.main > section");
  Elements sections = section.select("div");
  for (Element element : sections) {
    if (element.text().equals("Results") || element.text().equals("Okay")) {
      continue;
    }
    System.out.println(element.text());
  }
}
 
Example 16
Source File: ParseSection.java    From schedge with MIT License 5 votes vote down vote up
public static SectionAttribute parse(@NotNull String rawData) {
  logger.debug("parsing raw catalog section data into SectionAttribute...");

  rawData = rawData.trim();

  if (rawData.equals("")) {
    logger.warn("Got bad data: empty string");
    return null; // the course doesn't exist
  }

  Document doc = Jsoup.parse(rawData);
  Element failed = doc.selectFirst("div.alert.alert-info");
  if (failed != null) {
    logger.warn("Got bad data: " + failed.text());
    return null; // the course doesn't exist
  }

  Elements elements = doc.select("a");
  String link = null;
  for (Element element : elements) {
    String el = element.attr("href");
    if (el.contains("mapBuilding")) {
      link = el;
    }
  }

  doc.select("a").unwrap();
  doc.select("i").unwrap();
  doc.select("b").unwrap();
  Element outerDataSection = doc.selectFirst("body > section.main");
  Element innerDataSection = outerDataSection.selectFirst("> section");
  Element courseNameDiv = innerDataSection.selectFirst("> div.primary-head");
  String courseName = courseNameDiv.text();
  Elements dataDivs =
      innerDataSection.select("> div.section-content.clearfix");
  Map<String, String> secData = parseSectionAttributes(dataDivs);

  return parsingElements(secData, courseName, link);
}
 
Example 17
Source File: GetRatings.java    From schedge with MIT License 5 votes vote down vote up
private static String parseLink(String rawData) {
  logger.debug("parsing raw RMP data to link...");
  rawData = rawData.trim();
  if (rawData == null || rawData.equals("")) {
    logger.warn("Got bad data: empty string");
    return null;
  }

  Document doc = Jsoup.parse(rawData);
  Element body = doc.selectFirst("body.search_results");
  Element container = body.selectFirst("div#container");
  Element innerBody = container.selectFirst("div#body");
  Element mainContent = innerBody.selectFirst("div#mainContent");
  Element resBox = mainContent.selectFirst("div#searchResultsBox");
  Element listings = resBox.selectFirst("div.listings-wrap");

  if (listings == null) {
    return null;
  }

  Element innerListings = listings.selectFirst("ul.listings");
  Elements professors = innerListings.select("li.listing.PROFESSOR");
  for (Element element : professors) {
    String school =
        element.selectFirst("span.sub").toString(); //<- Bugs at this line
    if (school.contains("New York University") || school.contains("NYU")) {
      return element.selectFirst("a").attr("href").split("=")[1];
    }
  }

  return null;
}