com.gargoylesoftware.htmlunit.ProxyConfig Java Examples

The following examples show how to use com.gargoylesoftware.htmlunit.ProxyConfig. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HostExtractor.java    From htmlunit with Apache License 2.0 5 votes vote down vote up
/**
 * The entry point.
 * @param args optional proxy hostname and port
 * @throws Exception if an error occurs
 */
public static void main(final String[] args) throws Exception {
    final Set<String> set = new HashSet<>();
    try (WebClient webClient = new WebClient(BrowserVersion.CHROME)) {
        if (args.length > 1) {
            final ProxyConfig proxyConfig = new ProxyConfig(args[0], Integer.parseInt(args[1]));
            proxyConfig.addHostsToProxyBypass("localhost");
            webClient.getOptions().setProxyConfig(proxyConfig);
        }
        fillMDNWebAPI(webClient, set);
        fillMDNJavaScriptGlobalObjects(webClient, set);
        final String testRoot = "src/test/java/";
        ensure(new File(testRoot + HostClassNameTest.class.getName().replace('.', '/') + ".java"), set);
    }
}
 
Example #2
Source File: HtmlUnitPageLoader.java    From xxl-crawler with GNU General Public License v3.0 4 votes vote down vote up
@Override
public Document load(PageRequest pageRequest) {
    if (!UrlUtil.isUrl(pageRequest.getUrl())) {
        return null;
    }

    WebClient webClient = new WebClient();
    try {
        WebRequest webRequest = new WebRequest(new URL(pageRequest.getUrl()));

        // 请求设置
        webClient.getOptions().setUseInsecureSSL(true);
        webClient.getOptions().setJavaScriptEnabled(true);
        webClient.getOptions().setCssEnabled(false);
        webClient.getOptions().setThrowExceptionOnScriptError(false);
        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
        webClient.getOptions().setDoNotTrackEnabled(false);
        webClient.getOptions().setUseInsecureSSL(!pageRequest.isValidateTLSCertificates());

        if (pageRequest.getParamMap() != null && !pageRequest.getParamMap().isEmpty()) {
            for (Map.Entry<String, String> paramItem : pageRequest.getParamMap().entrySet()) {
                webRequest.getRequestParameters().add(new NameValuePair(paramItem.getKey(), paramItem.getValue()));
            }
        }
        if (pageRequest.getCookieMap() != null && !pageRequest.getCookieMap().isEmpty()) {
            webClient.getCookieManager().setCookiesEnabled(true);
            for (Map.Entry<String, String> cookieItem : pageRequest.getCookieMap().entrySet()) {
                webClient.getCookieManager().addCookie(new Cookie("", cookieItem.getKey(), cookieItem.getValue()));
            }
        }
        if (pageRequest.getHeaderMap() != null && !pageRequest.getHeaderMap().isEmpty()) {
            webRequest.setAdditionalHeaders(pageRequest.getHeaderMap());
        }
        if (pageRequest.getUserAgent() != null) {
            webRequest.setAdditionalHeader("User-Agent", pageRequest.getUserAgent());
        }
        if (pageRequest.getReferrer() != null) {
            webRequest.setAdditionalHeader("Referer", pageRequest.getReferrer());
        }

        webClient.getOptions().setTimeout(pageRequest.getTimeoutMillis());
        webClient.setJavaScriptTimeout(pageRequest.getTimeoutMillis());
        webClient.waitForBackgroundJavaScript(pageRequest.getTimeoutMillis());

        // 代理
        if (pageRequest.getProxy() != null) {
            InetSocketAddress address = (InetSocketAddress) pageRequest.getProxy().address();
            boolean isSocks = pageRequest.getProxy().type() == Proxy.Type.SOCKS;
            webClient.getOptions().setProxyConfig(new ProxyConfig(address.getHostName(), address.getPort(), isSocks));
        }

        // 发出请求
        if (pageRequest.isIfPost()) {
            webRequest.setHttpMethod(HttpMethod.POST);
        } else {
            webRequest.setHttpMethod(HttpMethod.GET);
        }
        HtmlPage page = webClient.getPage(webRequest);

        String pageAsXml = page.asXml();
        if (pageAsXml != null) {
            Document html = Jsoup.parse(pageAsXml);
            return html;
        }
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
    } finally {
        if (webClient != null) {
            webClient.close();
        }
    }
    return null;
}