Java Code Examples for us.codecraft.webmagic.Task#getSite()
The following examples show how to use
us.codecraft.webmagic.Task#getSite() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CasperjsDownloader.java From spider with GNU General Public License v3.0 | 6 votes |
@Override public Page download(Request request, Task task) { String html = null; Site site = null; if (task != null) { site = task.getSite(); } try { html = casperjs.gatherHtml(new com.gs.spider.model.commons.Request(request.getUrl(), true)); } catch (Exception e) { if (site.getCycleRetryTimes() > 0) { return addToCycleRetry(request, site); } request.putExtra("EXCEPTION", e); onError(request); return null; } Page page = new Page(); page.setRawText(html); page.setUrl(new PlainText(request.getUrl())); page.setRequest(request); onSuccess(request); return page; }
Example 2
Source File: HttpClientDownloader.java From blog-hunter with MIT License | 5 votes |
@Override public Page download(Request request, Task task) { if (task == null || task.getSite() == null) { throw new NullPointerException("task or site can not be null"); } CloseableHttpResponse httpResponse = null; CloseableHttpClient httpClient = getHttpClient(task.getSite()); Proxy proxy = proxyProvider != null ? proxyProvider.getProxy(task) : null; HttpClientRequestContext requestContext = httpUriRequestConverter.convert(request, task.getSite(), proxy); Page page = Page.fail(); try { httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext()); page = handleResponse(request, request.getCharset() != null ? request.getCharset() : task.getSite().getCharset(), httpResponse, task); onSuccess(request); logger.debug("downloading page success {}", request.getUrl()); return page; } catch (IOException e) { logger.warn("download page {} error", request.getUrl(), e); onError(request); return page; } finally { if (httpResponse != null) { //ensure the connection is released back to pool EntityUtils.consumeQuietly(httpResponse.getEntity()); } if (proxyProvider != null && proxy != null) { proxyProvider.returnProxy(proxy, page, task); } } }
Example 3
Source File: HttpClientDownloader.java From plumemo with Apache License 2.0 | 5 votes |
@Override public Page download(Request request, Task task) { if (task == null || task.getSite() == null) { throw new NullPointerException("task or site can not be null"); } CloseableHttpResponse httpResponse = null; CloseableHttpClient httpClient = getHttpClient(task.getSite()); Proxy proxy = proxyProvider != null ? proxyProvider.getProxy(task) : null; HttpClientRequestContext requestContext = httpUriRequestConverter.convert(request, task.getSite(), proxy); Page page = Page.fail(); try { httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext()); page = handleResponse(request, request.getCharset() != null ? request.getCharset() : task.getSite().getCharset(), httpResponse, task); onSuccess(request); logger.info("downloading page success {}", request.getUrl()); return page; } catch (IOException e) { logger.warn("download page {} error", request.getUrl(), e); onError(request); return page; } finally { if (httpResponse != null) { //ensure the connection is released back to pool EntityUtils.consumeQuietly(httpResponse.getEntity()); } if (proxyProvider != null && proxy != null) { proxyProvider.returnProxy(proxy, page, task); } } }
Example 4
Source File: HttpClientDownloader.java From webmagic with Apache License 2.0 | 5 votes |
@Override public Page download(Request request, Task task) { if (task == null || task.getSite() == null) { throw new NullPointerException("task or site can not be null"); } CloseableHttpResponse httpResponse = null; CloseableHttpClient httpClient = getHttpClient(task.getSite()); Proxy proxy = proxyProvider != null ? proxyProvider.getProxy(task) : null; HttpClientRequestContext requestContext = httpUriRequestConverter.convert(request, task.getSite(), proxy); Page page = Page.fail(); try { httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext()); page = handleResponse(request, request.getCharset() != null ? request.getCharset() : task.getSite().getCharset(), httpResponse, task); onSuccess(request); logger.info("downloading page success {}", request.getUrl()); return page; } catch (IOException e) { logger.warn("download page {} error", request.getUrl(), e); onError(request); return page; } finally { if (httpResponse != null) { //ensure the connection is released back to pool EntityUtils.consumeQuietly(httpResponse.getEntity()); } if (proxyProvider != null && proxy != null) { proxyProvider.returnProxy(proxy, page, task); } } }
Example 5
Source File: CrawlerDownloader.java From tom-crawler with Apache License 2.0 | 4 votes |
@Override public Page download(Request request, Task task) { if (task == null || task.getSite() == null) { throw new NullPointerException("task or site can not be null"); } CloseableHttpResponse httpResponse = null; CloseableHttpClient httpClient = getHttpClient(task.getSite()); Proxy proxy = proxyProvider != null ? proxyProvider.getProxy(task) : null; HttpClientRequestContext requestContext = httpUriRequestConverter.convert(request, task.getSite(), proxy); Page page = Page.fail(); try { httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext()); page = handleResponse(request, request.getCharset() != null ? request.getCharset() : task.getSite().getCharset(), httpResponse, task); onSuccess(request); logger.debug("downloading page success {}", request.getUrl()); } catch (IOException e) { if (e instanceof ConnectionClosedException) { logger.error("Premature end of chunk coded message body: {}", request.getUrl()); } else if (e instanceof SSLHandshakeException) { logger.error("Remote host closed connection during handshake: {}", request.getUrl()); } else if (e instanceof SSLException) { logger.error("SSL peer shut down incorrectly:[HttpClient] {}", request.getUrl()); } else if (e instanceof SocketTimeoutException) { logger.error("download page time out:{}", request.getUrl()); } else if (e instanceof NoHttpResponseException) { logger.error("failed to respond:{}", request.getUrl()); } else if (e instanceof HttpHostConnectException) { logger.error("Connect to proxy timed out:{}", request.getUrl()); } else if (e instanceof TruncatedChunkException) { logger.error("TruncatedChunkException:{}, msg:{}", request.getUrl(), e.getMessage()); } else { logger.error("download page error:{} ", request.getUrl(), e); } onError(request); } finally { if (httpResponse != null) { //ensure the connection is released back to pool EntityUtils.consumeQuietly(httpResponse.getEntity()); } if (proxyProvider != null && proxy != null) { proxyProvider.returnProxy(proxy, page, task); } } return page; }