us.codecraft.webmagic.proxy.Proxy Java Examples
The following examples show how to use
us.codecraft.webmagic.proxy.Proxy.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ContentLengthLimitHttpClientDownloader.java From Gather-Platform with GNU General Public License v3.0 | 6 votes |
private CloseableHttpClient getHttpClient(Site site, Proxy proxy) { if (site == null) { return httpClientGenerator.getClient(null, proxy); } String domain = site.getDomain(); CloseableHttpClient httpClient = httpClients.get(domain); if (httpClient == null) { synchronized (this) { httpClient = httpClients.get(domain); if (httpClient == null) { httpClient = httpClientGenerator.getClient(site, proxy); httpClients.put(domain, httpClient); } } } return httpClient; }
Example #2
Source File: ContentLengthLimitHttpClientDownloader.java From spider with GNU General Public License v3.0 | 6 votes |
private CloseableHttpClient getHttpClient(Site site, Proxy proxy) { if (site == null) { return httpClientGenerator.getClient(null, proxy); } String domain = site.getDomain(); CloseableHttpClient httpClient = httpClients.get(domain); if (httpClient == null) { synchronized (this) { httpClient = httpClients.get(domain); if (httpClient == null) { httpClient = httpClientGenerator.getClient(site, proxy); httpClients.put(domain, httpClient); } } } return httpClient; }
Example #3
Source File: HttpUriRequestConverter.java From webmagic with Apache License 2.0 | 6 votes |
private HttpClientContext convertHttpClientContext(Request request, Site site, Proxy proxy) { HttpClientContext httpContext = new HttpClientContext(); if (proxy != null && proxy.getUsername() != null) { AuthState authState = new AuthState(); authState.update(new BasicScheme(ChallengeState.PROXY), new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword())); httpContext.setAttribute(HttpClientContext.PROXY_AUTH_STATE, authState); } if (request.getCookies() != null && !request.getCookies().isEmpty()) { CookieStore cookieStore = new BasicCookieStore(); for (Map.Entry<String, String> cookieEntry : request.getCookies().entrySet()) { BasicClientCookie cookie1 = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue()); cookie1.setDomain(UrlUtils.removePort(UrlUtils.getDomain(request.getUrl()))); cookieStore.addCookie(cookie1); } httpContext.setCookieStore(cookieStore); } return httpContext; }
Example #4
Source File: HttpClientDownloaderTest.java From webmagic with Apache License 2.0 | 6 votes |
@Test public void test_download_auth_by_SimpleProxyProvider() throws Exception { HttpServer server = httpServer(13423); server.get(eq(header("Proxy-Authorization"), "Basic dXNlcm5hbWU6cGFzc3dvcmQ=")).response("ok"); Runner.running(server, new Runnable() { @Override public void run() throws Exception { HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); httpClientDownloader.setProxyProvider(SimpleProxyProvider.from(new Proxy("127.0.0.1", 13423, "username", "password"))); Request request = new Request(); request.setUrl("http://www.baidu.com"); Page page = httpClientDownloader.download(request, Site.me().toTask()); assertThat(page.getRawText()).isEqualTo("ok"); } }); }
Example #5
Source File: BlogHunterProcessor.java From blog-hunter with MIT License | 5 votes |
/** * 运行爬虫并返回结果 * * @return */ @Override public CopyOnWriteArrayList<VirtualArticle> execute() { List<String> errors = this.validateModel(config); if (CollectionUtils.isNotEmpty(errors)) { writer.print("校验不通过!请依据下方提示,检查输入参数是否正确......"); for (String error : errors) { writer.print(">> " + error); } return null; } CopyOnWriteArrayList<VirtualArticle> virtualArticles = new CopyOnWriteArrayList<>(); Hunter spider = Hunter.create(this, config, uuid); spider.addUrl(config.getEntryUrls().toArray(new String[0])) .setScheduler(new BlockingQueueScheduler(config)) .addPipeline((resultItems, task) -> this.process(resultItems, virtualArticles, spider)) .setDownloader(new HttpClientDownloader()) .thread(config.getThreadCount()); //设置抓取代理IP if (!CollectionUtils.isEmpty(config.getProxyList())) { HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); SimpleProxyProvider provider = SimpleProxyProvider.from(config.getProxyList().toArray(new Proxy[0])); httpClientDownloader.setProxyProvider(provider); spider.setDownloader(httpClientDownloader); } // 测试代理 /*HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); SimpleProxyProvider provider = SimpleProxyProvider.from( new Proxy("61.135.217.7", 80) ); httpClientDownloader.setProxyProvider(provider); spider.setDownloader(httpClientDownloader);*/ // 启动爬虫 spider.run(); return virtualArticles; }
Example #6
Source File: HttpClientDownloader.java From blog-hunter with MIT License | 5 votes |
@Override public Page download(Request request, Task task) { if (task == null || task.getSite() == null) { throw new NullPointerException("task or site can not be null"); } CloseableHttpResponse httpResponse = null; CloseableHttpClient httpClient = getHttpClient(task.getSite()); Proxy proxy = proxyProvider != null ? proxyProvider.getProxy(task) : null; HttpClientRequestContext requestContext = httpUriRequestConverter.convert(request, task.getSite(), proxy); Page page = Page.fail(); try { httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext()); page = handleResponse(request, request.getCharset() != null ? request.getCharset() : task.getSite().getCharset(), httpResponse, task); onSuccess(request); logger.debug("downloading page success {}", request.getUrl()); return page; } catch (IOException e) { logger.warn("download page {} error", request.getUrl(), e); onError(request); return page; } finally { if (httpResponse != null) { //ensure the connection is released back to pool EntityUtils.consumeQuietly(httpResponse.getEntity()); } if (proxyProvider != null && proxy != null) { proxyProvider.returnProxy(proxy, page, task); } } }
Example #7
Source File: HunterConfig.java From blog-hunter with MIT License | 5 votes |
public HunterConfig setProxy(String proxyStr) { if (this.proxyType != ProxyType.CUSTOM || proxyStr == null) { return this; } String[] proxyArr = proxyStr.split("\r\n"); for (String s : proxyArr) { String[] proxy = s.split("|"); if (proxy.length == 2) { this.addProxy(new Proxy(proxy[0], Integer.parseInt(proxy[1]))); } else if (proxy.length == 4) { this.addProxy(new Proxy(proxy[0], Integer.parseInt(proxy[1]), proxy[2], proxy[3])); } } return this; }
Example #8
Source File: HttpClientDownloader.java From plumemo with Apache License 2.0 | 5 votes |
@Override public Page download(Request request, Task task) { if (task == null || task.getSite() == null) { throw new NullPointerException("task or site can not be null"); } CloseableHttpResponse httpResponse = null; CloseableHttpClient httpClient = getHttpClient(task.getSite()); Proxy proxy = proxyProvider != null ? proxyProvider.getProxy(task) : null; HttpClientRequestContext requestContext = httpUriRequestConverter.convert(request, task.getSite(), proxy); Page page = Page.fail(); try { httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext()); page = handleResponse(request, request.getCharset() != null ? request.getCharset() : task.getSite().getCharset(), httpResponse, task); onSuccess(request); logger.info("downloading page success {}", request.getUrl()); return page; } catch (IOException e) { logger.warn("download page {} error", request.getUrl(), e); onError(request); return page; } finally { if (httpResponse != null) { //ensure the connection is released back to pool EntityUtils.consumeQuietly(httpResponse.getEntity()); } if (proxyProvider != null && proxy != null) { proxyProvider.returnProxy(proxy, page, task); } } }
Example #9
Source File: HttpClientDownloader.java From webmagic with Apache License 2.0 | 5 votes |
@Override public Page download(Request request, Task task) { if (task == null || task.getSite() == null) { throw new NullPointerException("task or site can not be null"); } CloseableHttpResponse httpResponse = null; CloseableHttpClient httpClient = getHttpClient(task.getSite()); Proxy proxy = proxyProvider != null ? proxyProvider.getProxy(task) : null; HttpClientRequestContext requestContext = httpUriRequestConverter.convert(request, task.getSite(), proxy); Page page = Page.fail(); try { httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext()); page = handleResponse(request, request.getCharset() != null ? request.getCharset() : task.getSite().getCharset(), httpResponse, task); onSuccess(request); logger.info("downloading page success {}", request.getUrl()); return page; } catch (IOException e) { logger.warn("download page {} error", request.getUrl(), e); onError(request); return page; } finally { if (httpResponse != null) { //ensure the connection is released back to pool EntityUtils.consumeQuietly(httpResponse.getEntity()); } if (proxyProvider != null && proxy != null) { proxyProvider.returnProxy(proxy, page, task); } } }
Example #10
Source File: HttpUriRequestConverter.java From webmagic with Apache License 2.0 | 5 votes |
private HttpUriRequest convertHttpUriRequest(Request request, Site site, Proxy proxy) { RequestBuilder requestBuilder = selectRequestMethod(request).setUri(UrlUtils.fixIllegalCharacterInUrl(request.getUrl())); if (site.getHeaders() != null) { for (Map.Entry<String, String> headerEntry : site.getHeaders().entrySet()) { requestBuilder.addHeader(headerEntry.getKey(), headerEntry.getValue()); } } RequestConfig.Builder requestConfigBuilder = RequestConfig.custom(); if (site != null) { requestConfigBuilder.setConnectionRequestTimeout(site.getTimeOut()) .setSocketTimeout(site.getTimeOut()) .setConnectTimeout(site.getTimeOut()) .setCookieSpec(CookieSpecs.STANDARD); } if (proxy != null) { requestConfigBuilder.setProxy(new HttpHost(proxy.getHost(), proxy.getPort(), proxy.getScheme())); } requestBuilder.setConfig(requestConfigBuilder.build()); HttpUriRequest httpUriRequest = requestBuilder.build(); if (request.getHeaders() != null && !request.getHeaders().isEmpty()) { for (Map.Entry<String, String> header : request.getHeaders().entrySet()) { httpUriRequest.addHeader(header.getKey(), header.getValue()); } } return httpUriRequest; }
Example #11
Source File: HunterConfig.java From blog-hunter with MIT License | 4 votes |
private void addProxy(Proxy proxy) { if (this.proxyType == ProxyType.CUSTOM || null == proxy) { return; } proxyList.add(proxy); }
Example #12
Source File: CrawlerDownloader.java From tom-crawler with Apache License 2.0 | 4 votes |
@Override public Page download(Request request, Task task) { if (task == null || task.getSite() == null) { throw new NullPointerException("task or site can not be null"); } CloseableHttpResponse httpResponse = null; CloseableHttpClient httpClient = getHttpClient(task.getSite()); Proxy proxy = proxyProvider != null ? proxyProvider.getProxy(task) : null; HttpClientRequestContext requestContext = httpUriRequestConverter.convert(request, task.getSite(), proxy); Page page = Page.fail(); try { httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext()); page = handleResponse(request, request.getCharset() != null ? request.getCharset() : task.getSite().getCharset(), httpResponse, task); onSuccess(request); logger.debug("downloading page success {}", request.getUrl()); } catch (IOException e) { if (e instanceof ConnectionClosedException) { logger.error("Premature end of chunk coded message body: {}", request.getUrl()); } else if (e instanceof SSLHandshakeException) { logger.error("Remote host closed connection during handshake: {}", request.getUrl()); } else if (e instanceof SSLException) { logger.error("SSL peer shut down incorrectly:[HttpClient] {}", request.getUrl()); } else if (e instanceof SocketTimeoutException) { logger.error("download page time out:{}", request.getUrl()); } else if (e instanceof NoHttpResponseException) { logger.error("failed to respond:{}", request.getUrl()); } else if (e instanceof HttpHostConnectException) { logger.error("Connect to proxy timed out:{}", request.getUrl()); } else if (e instanceof TruncatedChunkException) { logger.error("TruncatedChunkException:{}, msg:{}", request.getUrl(), e.getMessage()); } else { logger.error("download page error:{} ", request.getUrl(), e); } onError(request); } finally { if (httpResponse != null) { //ensure the connection is released back to pool EntityUtils.consumeQuietly(httpResponse.getEntity()); } if (proxyProvider != null && proxy != null) { proxyProvider.returnProxy(proxy, page, task); } } return page; }
Example #13
Source File: HttpUriRequestConverter.java From webmagic with Apache License 2.0 | 4 votes |
public HttpClientRequestContext convert(Request request, Site site, Proxy proxy) { HttpClientRequestContext httpClientRequestContext = new HttpClientRequestContext(); httpClientRequestContext.setHttpUriRequest(convertHttpUriRequest(request, site, proxy)); httpClientRequestContext.setHttpClientContext(convertHttpClientContext(request, site, proxy)); return httpClientRequestContext; }