Java Code Examples for org.jsoup.nodes.Document#title()
The following examples show how to use
org.jsoup.nodes.Document#title() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DynamicIp.java From rank with Apache License 2.0 | 6 votes |
public static boolean isConnected(){ try { Document doc = Jsoup.connect("http://www.baidu.com/s?wd=杨尚川&t=" + System.currentTimeMillis()) .header("Accept", ACCEPT) .header("Accept-Encoding", ENCODING) .header("Accept-Language", LANGUAGE) .header("Connection", CONNECTION) .header("Referer", "https://www.baidu.com") .header("Host", "www.baidu.com") .header("User-Agent", USER_AGENT) .ignoreContentType(true) .timeout(30000) .get(); LOGGER.info("搜索结果页面标题:"+doc.title()); if(doc.title() != null && doc.title().contains("杨尚川")){ return true; } }catch (Exception e){ if("Network is unreachable".equals(e.getMessage())){ return false; }else{ LOGGER.error("状态检查失败:"+e.getMessage()); } } return false; }
Example 2
Source File: UrlTitleAnnouncer.java From VileBot with MIT License | 6 votes |
/** * Accesses the source of a HTML page and looks for a title element * * @param url http URI String * @return String of text between the first <title> tag group on the page, empty if error. */ private String scrapeURLHTMLTitle( String url ) { String title = ""; try { Document doc = Jsoup.connect( url ).get(); title = doc.title(); } catch ( IOException x ) { System.err.format( "scrapeURLHTMLTitle BufferedReader error: %s%n", x ); } return title; }
Example 3
Source File: DynamicIp.java From superword with Apache License 2.0 | 6 votes |
public static boolean isConnected(){ try { Document doc = Jsoup.connect("http://www.baidu.com/s?wd=杨尚川&t=" + System.currentTimeMillis()) .header("Accept", ACCEPT) .header("Accept-Encoding", ENCODING) .header("Accept-Language", LANGUAGE) .header("Connection", CONNECTION) .header("Referer", "https://www.baidu.com") .header("Host", "www.baidu.com") .header("User-Agent", USER_AGENT) .ignoreContentType(true) .timeout(30000) .get(); LOGGER.info("搜索结果页面标题:"+doc.title()); if(doc.title() != null && doc.title().contains("杨尚川")){ return true; } }catch (Exception e){ if("Network is unreachable".equals(e.getMessage())){ return false; }else{ LOGGER.error("状态检查失败:"+e.getMessage()); } } return false; }
Example 4
Source File: ArticalRemoteDataSource.java From KotlinMVPRxJava2Dagger2GreenDaoRetrofitDemo with Apache License 2.0 | 6 votes |
private List<String> parseData(String html) { //jsoup解析数据 Document document = Jsoup.parse(html); String title = document.title(); ArrayList<String> strings = new ArrayList<>(); strings.add(title); Elements ul = document.getElementsByTag("ul"); for (Element element : ul) { if (ul.hasClass("panel_body itemlist")) { Elements a = element.getElementsByTag("a"); for (Element aa : a) { if (aa.ownText().length() > 20) strings.add(aa.ownText()); } } } return strings; }
Example 5
Source File: TwitchVideoRipper.java From ripme with MIT License | 6 votes |
@Override public void rip() throws IOException { LOGGER.info("Retrieving " + this.url); Document doc = Http.url(url).get(); //Get user friendly filename from page title String title = doc.title(); Elements script = doc.select("script"); if (script.isEmpty()) { throw new IOException("Could not find script code at " + url); } //Regex assumes highest quality source is listed first Pattern p = Pattern.compile("\"source\":\"(.*?)\""); for (Element element : script) { Matcher m = p.matcher(element.data()); if (m.find()){ String vidUrl = m.group(1); addURLToDownload(new URL(vidUrl), HOST + "_" + title); } } waitForThreads(); }
Example 6
Source File: JsoupTesting.java From Java-Data-Science-Cookbook with MIT License | 6 votes |
public void extractDataWithJsoup(String href){ Document doc = null; try { doc = Jsoup.connect(href).timeout(10*1000).userAgent("Mozilla").ignoreHttpErrors(true).get(); } catch (IOException e) { //Your exception handling here } if(doc != null){ String title = doc.title(); String text = doc.body().text(); Elements links = doc.select("a[href]"); for (Element link : links) { String linkHref = link.attr("href"); String linkText = link.text(); String linkOuterHtml = link.outerHtml(); String linkInnerHtml = link.html(); } } }
Example 7
Source File: ParserDemo.java From crawler4j with Apache License 2.0 | 5 votes |
@Override public void parse(HttpFetchResult result, String url, String threadName, boolean isUpdate) { try { String html = result.getHtml(); Document doc = Jsoup.parse(html); String title = doc.title(); logger.info(threadName +" " + title + " " + url + " "); } catch (Exception e) { e.printStackTrace(); } }
Example 8
Source File: UtilsDemoActivity.java From UltimateAndroid with Apache License 2.0 | 5 votes |
@Override protected Void doInBackground(Void... params) { try { // Connect to the web site Document document = Jsoup.connect(url).get(); // Get the html document title title = document.title(); } catch (IOException e) { e.printStackTrace(); } return null; }
Example 9
Source File: UtilsDemoActivity.java From UltimateAndroid with Apache License 2.0 | 5 votes |
public void onHandleIntent(Intent intent) { this.url = intent.getStringExtra("url"); try { // Connect to the web site Document document = Jsoup.connect(url).get(); // Get the html document title title = document.title(); } catch (IOException e) { e.printStackTrace(); } Intent resultIntent = new Intent(TITLE_FILTER); resultIntent.putExtra("title", title); LocalBroadcastManager.getInstance(this).sendBroadcast(resultIntent); }
Example 10
Source File: DynamicIp.java From superword with Apache License 2.0 | 5 votes |
public static boolean execute(Map<String, String> cookies, String action){ String url = "http://192.168.0.1/goform/SysStatusHandle"; Map<String, String> map = new HashMap<>(); map.put("action", action); map.put("CMD", "WAN_CON"); map.put("GO", "system_status.asp"); Connection conn = Jsoup.connect(url) .header("Accept", ACCEPT) .header("Accept-Encoding", ENCODING) .header("Accept-Language", LANGUAGE) .header("Connection", CONNECTION) .header("Host", HOST) .header("Referer", REFERER) .header("User-Agent", USER_AGENT) .ignoreContentType(true) .timeout(30000); for(String cookie : cookies.keySet()){ conn.cookie(cookie, cookies.get(cookie)); } String title = null; try { Connection.Response response = conn.method(Connection.Method.POST).data(map).execute(); String html = response.body(); Document doc = Jsoup.parse(html); title = doc.title(); LOGGER.info("操作连接页面标题:"+title); Thread.sleep(10000); }catch (Exception e){ LOGGER.error(e.getMessage()); } if("LAN | LAN Settings".equals(title)){ if(("3".equals(action) && isConnected()) || ("4".equals(action) && !isConnected())){ return true; } } return false; }
Example 11
Source File: WxCrawlServiceImpl.java From wx-crawl with Apache License 2.0 | 5 votes |
private String getArticleTitle(Document sourceDoc) { String title = ""; if(sourceDoc.head() != null && StringUtils.isNotEmpty(sourceDoc.head().attr(WxCrawlerConstant.BackupArticle.ARTICLE_TITLE))) { title = sourceDoc.head().attr(WxCrawlerConstant.BackupArticle.ARTICLE_TITLE); } else if (sourceDoc.select(WxCrawlerConstant.HTMLElementSelector.TITLE).first() != null) { title = sourceDoc.select(WxCrawlerConstant.HTMLElementSelector.TITLE).first().text(); } else { title = sourceDoc.title(); } return title; }
Example 12
Source File: JSoupExamples.java From Java-for-Data-Science with MIT License | 5 votes |
public void displayBodyText(Document document) { // Displays the entire body of the document String title = document.title(); out.println("Title: " + title); out.println("---Body---"); Elements element = document.select("body"); out.println("Text: " + element.text()); }
Example 13
Source File: InternetBrowser.java From petscii-bbs with Mozilla Public License 2.0 | 5 votes |
public static List<Entry> getAllLinks(Document webpage) throws Exception { List<Entry> urls = new ArrayList<>(); //why String title = webpage.title(); Elements links = webpage.select("a[href]"); Element link; for(int j=0; j < links.size(); j++){ link=links.get(j); final String label = defaultIfBlank(link.text(), link.attr("href")); urls.add(new Entry(link.absUrl("href"), label)); } return urls; }
Example 14
Source File: JsoupHCalendarExtractor.java From wandora with GNU General Public License v3.0 | 5 votes |
private void parseCalendar(Document document) throws TopicMapException { String title = document.title(); Topic type = getType("vcalendar"); Topic topic = getOrCreateTopic(tm,null, title); topic.addType(type); parseCalendar(topic, document.body()); }
Example 15
Source File: ApiCatalogEndpointIntegrationTest.java From api-layer with Eclipse Public License 2.0 | 5 votes |
@Test public void whenMisSpeltContainersEndpoint_thenNotFoundResponseWithAPIMessage() throws Exception { HttpResponse response = getResponse(INVALID_CONTAINER_ENDPOINT, HttpStatus.SC_NOT_FOUND); final String htmlResponse = EntityUtils.toString(response.getEntity()); Document doc = Jsoup.parse(htmlResponse); String title = doc.title(); Elements h1 = doc.select("h1:first-child"); Elements a = doc.select("a"); assertNotNull(title); assertEquals("404 Not Found", title); assertEquals("404 Page Not Found", h1.text()); assertEquals("Go to Dashboard", a.text()); }
Example 16
Source File: ShadowSocksCrawlerService.java From ShadowSocks-Share with Apache License 2.0 | 5 votes |
/** * 爬取 ss 账号 */ public ShadowSocksEntity getShadowSocks() { try { Document document = getDocument(); ShadowSocksEntity entity = new ShadowSocksEntity(getTargetURL(), document.title(), true, new Date()); entity.setShadowSocksSet(parse(document)); return entity; } catch (IOException e) { log.error(e.getMessage()); } return new ShadowSocksEntity(getTargetURL(), "", false, new Date()); }
Example 17
Source File: AutoGetHtml.java From danyuan-application with Apache License 2.0 | 4 votes |
/** * @throws IOException * 方法名: getBody * 功 能: TODO(这里用一句话描述这个方法的作用) * 参 数: @param url * 参 数: @param key * 参 数: @return * 返 回: String * 作 者 : Tenghui.Wang * @throws */ public static String getBody(String url, String key) throws IOException { Document doc = Jsoup.connect("http://www.oschina.net/") .data("query", "Java") // 请求参数 .userAgent("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2") // 设置 User-Agent .cookie("auth", "token") // 设置 cookie .timeout(3000) // 设置连接超时时间 .post(); // 使用 POST 方法访问 URL return doc.title(); }
Example 18
Source File: AbstractHtmlConsumer.java From baleen with Apache License 2.0 | 4 votes |
@Override protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException { final File f = getFileName(jCas); final DocumentAnnotation da = getDocumentAnnotation(jCas); final Document doc = Jsoup.parse("<!DOCTYPE html>\n<html lang=\"" + da.getLanguage() + "\"></html>"); doc.outputSettings(new Document.OutputSettings().prettyPrint(false)); final Element head = doc.head(); if (!Strings.isNullOrEmpty(css)) { final Element cssLink = head.appendElement("link"); cssLink.attr("rel", "stylesheet"); cssLink.attr("href", css); } final Element charset = head.appendElement("meta"); charset.attr("charset", "utf-8"); appendMeta(head, "document.type", da.getDocType()); appendMeta(head, "document.sourceUri", da.getSourceUri()); appendMeta(head, "externalId", da.getHash()); appendMeta(head, "document.classification", da.getDocumentClassification()); appendMeta( head, "document.caveats", String.join(",", UimaTypesUtils.toArray(da.getDocumentCaveats()))); appendMeta( head, "document.releasability", String.join(",", UimaTypesUtils.toArray(da.getDocumentReleasability()))); String title = null; for (final Metadata md : JCasUtil.select(jCas, Metadata.class)) { appendMeta(head, md.getKey(), md.getValue()); if ("documentTitle".equalsIgnoreCase(md.getKey())) { title = md.getValue(); } } if (!Strings.isNullOrEmpty(title)) { doc.title(title); } final Element body = doc.body(); writeBody(jCas, body); try { FileUtils.writeStringToFile(f, doc.html(), Charset.defaultCharset()); } catch (final IOException e) { throw new AnalysisEngineProcessException(e); } }
Example 19
Source File: Utils.java From SteamGifts with MIT License | 4 votes |
/** * The document title is in the format "Game Title - Page X" if we're on /giveaways/id/name/search?page=X, * so we strip out the page number. */ public static String getPageTitle(Document document) { String title = document.title(); return title.replaceAll(" - Page ([\\d,]+)$", ""); }
Example 20
Source File: HtmlUtils.java From ogham with Apache License 2.0 | 2 votes |
/** * Get the title of the HTML. If no <code>title</code> tag exists, then the * title is null. * * @param htmlContent * the HTML content that may contain a title * @return the title of the HTML or null if none */ public static String getTitle(String htmlContent) { Document doc = Jsoup.parse(htmlContent); Elements titleNode = doc.select("head > title"); return titleNode.isEmpty() ? null : doc.title(); }