Java Code Examples for com.gargoylesoftware.htmlunit.WebRequest#setAdditionalHeaders()
The following examples show how to use
com.gargoylesoftware.htmlunit.WebRequest#setAdditionalHeaders() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HtmlUnitPageLoader.java From xxl-crawler with GNU General Public License v3.0 | 4 votes |
@Override public Document load(PageRequest pageRequest) { if (!UrlUtil.isUrl(pageRequest.getUrl())) { return null; } WebClient webClient = new WebClient(); try { WebRequest webRequest = new WebRequest(new URL(pageRequest.getUrl())); // 请求设置 webClient.getOptions().setUseInsecureSSL(true); webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); webClient.getOptions().setDoNotTrackEnabled(false); webClient.getOptions().setUseInsecureSSL(!pageRequest.isValidateTLSCertificates()); if (pageRequest.getParamMap() != null && !pageRequest.getParamMap().isEmpty()) { for (Map.Entry<String, String> paramItem : pageRequest.getParamMap().entrySet()) { webRequest.getRequestParameters().add(new NameValuePair(paramItem.getKey(), paramItem.getValue())); } } if (pageRequest.getCookieMap() != null && !pageRequest.getCookieMap().isEmpty()) { webClient.getCookieManager().setCookiesEnabled(true); for (Map.Entry<String, String> cookieItem : pageRequest.getCookieMap().entrySet()) { webClient.getCookieManager().addCookie(new Cookie("", cookieItem.getKey(), cookieItem.getValue())); } } if (pageRequest.getHeaderMap() != null && !pageRequest.getHeaderMap().isEmpty()) { webRequest.setAdditionalHeaders(pageRequest.getHeaderMap()); } if (pageRequest.getUserAgent() != null) { webRequest.setAdditionalHeader("User-Agent", pageRequest.getUserAgent()); } if (pageRequest.getReferrer() != null) { webRequest.setAdditionalHeader("Referer", pageRequest.getReferrer()); } webClient.getOptions().setTimeout(pageRequest.getTimeoutMillis()); webClient.setJavaScriptTimeout(pageRequest.getTimeoutMillis()); webClient.waitForBackgroundJavaScript(pageRequest.getTimeoutMillis()); // 代理 if (pageRequest.getProxy() != null) { InetSocketAddress address = (InetSocketAddress) pageRequest.getProxy().address(); boolean isSocks = pageRequest.getProxy().type() == Proxy.Type.SOCKS; webClient.getOptions().setProxyConfig(new ProxyConfig(address.getHostName(), address.getPort(), isSocks)); } // 发出请求 if (pageRequest.isIfPost()) { webRequest.setHttpMethod(HttpMethod.POST); } else { webRequest.setHttpMethod(HttpMethod.GET); } HtmlPage page = webClient.getPage(webRequest); String pageAsXml = page.asXml(); if (pageAsXml != null) { Document html = Jsoup.parse(pageAsXml); return html; } } catch (IOException e) { logger.error(e.getMessage(), e); } finally { if (webClient != null) { webClient.close(); } } return null; }
Example 2
Source File: HtmlUnitDownloder.java From gecco-htmlunit with MIT License | 4 votes |
public HttpResponse download(HttpRequest request, int timeout) throws DownloadException { try { URL url = new URL(request.getUrl()); WebRequest webRequest = new WebRequest(url); webRequest.setHttpMethod(HttpMethod.GET); if(request instanceof HttpPostRequest) {//post HttpPostRequest post = (HttpPostRequest)request; webRequest.setHttpMethod(HttpMethod.POST); List<NameValuePair> requestParameters = new ArrayList<NameValuePair>(); for(Map.Entry<String, Object> entry : post.getFields().entrySet()) { NameValuePair nvp = new NameValuePair(entry.getKey(), entry.getValue().toString()); requestParameters.add(nvp); } webRequest.setRequestParameters(requestParameters); } //header boolean isMobile = SpiderThreadLocal.get().getEngine().isMobile(); webRequest.setAdditionalHeader("User-Agent", UserAgent.getUserAgent(isMobile)); webRequest.setAdditionalHeaders(request.getHeaders()); //proxy HttpHost proxy = Proxys.getProxy(); if(proxy != null) { webRequest.setProxyHost(proxy.getHostName()); webRequest.setProxyPort(proxy.getPort()); } //timeout this.webClient.getOptions().setTimeout(timeout); //request,response webClient.getPage(webRequest); HtmlPage page = webClient.getPage(request.getUrl()); HttpResponse resp = new HttpResponse(); WebResponse webResponse = page.getWebResponse(); int status = webResponse.getStatusCode(); resp.setStatus(status); if(status == 302 || status == 301) { String redirectUrl = webResponse.getResponseHeaderValue("Location"); resp.setContent(UrlUtils.relative2Absolute(request.getUrl(), redirectUrl)); } else if(status == 200) { String content = page.asXml(); resp.setContent(content); resp.setRaw(webResponse.getContentAsStream()); String contentType = webResponse.getContentType(); resp.setContentType(contentType); String charset = getCharset(request.getCharset(), contentType); resp.setCharset(charset); } else { throw new DownloadException("ERROR : " + status); } return resp; } catch(Exception ex) { throw new DownloadException(ex); } }