Java Code Examples for us.codecraft.webmagic.model.OOSpider#create()
The following examples show how to use
us.codecraft.webmagic.model.OOSpider#create() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaiduBaike.java From webmagic with Apache License 2.0 | 6 votes |
public static void main(String[] args) { OOSpider ooSpider = OOSpider.create(Site.me().setSleepTime(0), BaiduBaike.class); //single download String urlTemplate = "http://baike.baidu.com/search/word?word=%s&pic=1&sug=1&enc=utf8"; BaiduBaike baike = ooSpider.<BaiduBaike>get("http://baike.baidu.com/search/word?word=httpclient&pic=1&sug=1&enc=utf8"); System.out.println(baike); //multidownload List<String> list = new ArrayList<String>(); list.add(String.format(urlTemplate,"风力发电")); list.add(String.format(urlTemplate,"太阳能")); list.add(String.format(urlTemplate,"地热发电")); list.add(String.format(urlTemplate,"地热发电")); List<BaiduBaike> resultItemses = ooSpider.<BaiduBaike>getAll(list); for (BaiduBaike resultItemse : resultItemses) { System.out.println(resultItemse); } ooSpider.close(); }
Example 2
Source File: SpiderController.java From feiqu-opensource with Apache License 2.0 | 5 votes |
@RequestMapping("v2exSpider") @ResponseBody public Object v2exSpider(){ OOSpider ooSpider = OOSpider.create(Site.me() .setUserAgent(CommonConstant.userAgentArray[new Random().nextInt(CommonConstant.userAgentArray.length)]) .addHeader("Referer","https://www.v2ex.com/").setSleepTime(5000).setDomain("v2ex.com"), topicInfoPipeline, V2exDTO.class); ooSpider.addUrl("https://www.v2ex.com/?tab=jobs") .run(); return true; }
Example 3
Source File: BaiduNews.java From webmagic with Apache License 2.0 | 5 votes |
public static void main(String[] args) { OOSpider ooSpider = OOSpider.create(Site.me().setSleepTime(0), BaiduNews.class); //single download BaiduNews baike = ooSpider.<BaiduNews>get("http://news.baidu.com/ns?tn=news&cl=2&rn=20&ct=1&fr=bks0000&ie=utf-8&word=httpclient"); System.out.println(baike); ooSpider.close(); }
Example 4
Source File: HotContentJob.java From feiqu-opensource with Apache License 2.0 | 4 votes |
@Scheduled(cron = "0 3 */6 * * ?") public void spider(){ Stopwatch stopwatch = Stopwatch.createStarted(); try { OOSpider ooSpider = OOSpider.create(Site.me() .setUserAgent(CommonConstant.userAgentArray[new Random().nextInt(CommonConstant.userAgentArray.length)]) .addHeader("Referer","https://www.v2ex.com/").setSleepTime(30000).setDomain("v2ex.com"), topicInfoPipeline, V2exDTO.class); ooSpider.addUrl("https://www.v2ex.com/?tab=hot") .run(); stopwatch.stop(); /*String s = HttpClientUtil.getWebPage("https://api.readhub.cn/topic?lastCursor=&pageSize=20"); JSONObject jsonObject = new JSONObject(s); JSONArray data = jsonObject.getJSONArray("data"); Date now = new Date(); for (Object d : data) { JSONObject j = (JSONObject) d; String summary = j.getStr("summary"); String publishDate = j.getStr("publishDate"); String title = j.getStr("title"); String url = ""; JSONArray newsArray = j.getJSONArray("newsArray"); if (!newsArray.isEmpty()) { JSONObject ja = (JSONObject) newsArray.get(0); url = ja.getStr("url"); } // order = ((JSONObject) d).getInt("order"); FqTopic fqTopic = new FqTopic(); fqTopic.setAuthor(""); fqTopic.setAuthorIcon(""); fqTopic.setCommentCount(0); fqTopic.setContent(summary+"<br>发布时间:"+publishDate+"<br>相关地址:"+url); fqTopic.setTitle(title); fqTopic.setSource(SpiderSourceEnum.READ_HUB.getValue()); fqTopic.setGmtCreate(now); fqTopic.setType(""); fqTopicService.insert(fqTopic); }*/ } catch (Exception e) { logger.error("爬虫出错",e); } long seconds = stopwatch.elapsed(TimeUnit.SECONDS); logger.info("爬虫数据更新完毕,耗时{}秒",seconds); }