Java Code Examples for us.codecraft.webmagic.model.OOSpider#create()

The following examples show how to use us.codecraft.webmagic.model.OOSpider#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaiduBaike.java    From webmagic with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    OOSpider ooSpider = OOSpider.create(Site.me().setSleepTime(0), BaiduBaike.class);
    //single download
    String urlTemplate = "http://baike.baidu.com/search/word?word=%s&pic=1&sug=1&enc=utf8";
    BaiduBaike baike = ooSpider.<BaiduBaike>get("http://baike.baidu.com/search/word?word=httpclient&pic=1&sug=1&enc=utf8");
    System.out.println(baike);

    //multidownload
    List<String> list = new ArrayList<String>();
    list.add(String.format(urlTemplate,"风力发电"));
    list.add(String.format(urlTemplate,"太阳能"));
    list.add(String.format(urlTemplate,"地热发电"));
    list.add(String.format(urlTemplate,"地热发电"));
    List<BaiduBaike> resultItemses = ooSpider.<BaiduBaike>getAll(list);
    for (BaiduBaike resultItemse : resultItemses) {
        System.out.println(resultItemse);
    }
    ooSpider.close();
}
 
Example 2
Source File: SpiderController.java    From feiqu-opensource with Apache License 2.0 5 votes vote down vote up
@RequestMapping("v2exSpider")
@ResponseBody
public Object v2exSpider(){
    OOSpider ooSpider = OOSpider.create(Site.me()
                    .setUserAgent(CommonConstant.userAgentArray[new Random().nextInt(CommonConstant.userAgentArray.length)])
                    .addHeader("Referer","https://www.v2ex.com/").setSleepTime(5000).setDomain("v2ex.com"),
            topicInfoPipeline, V2exDTO.class);
    ooSpider.addUrl("https://www.v2ex.com/?tab=jobs")
            .run();
    return true;
}
 
Example 3
Source File: BaiduNews.java    From webmagic with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
    OOSpider ooSpider = OOSpider.create(Site.me().setSleepTime(0), BaiduNews.class);
    //single download
    BaiduNews baike = ooSpider.<BaiduNews>get("http://news.baidu.com/ns?tn=news&cl=2&rn=20&ct=1&fr=bks0000&ie=utf-8&word=httpclient");
    System.out.println(baike);

    ooSpider.close();
}
 
Example 4
Source File: HotContentJob.java    From feiqu-opensource with Apache License 2.0 4 votes vote down vote up
@Scheduled(cron = "0 3 */6 * * ?")
public void spider(){
    Stopwatch stopwatch = Stopwatch.createStarted();
    try {
        OOSpider ooSpider = OOSpider.create(Site.me()
                        .setUserAgent(CommonConstant.userAgentArray[new Random().nextInt(CommonConstant.userAgentArray.length)])
                        .addHeader("Referer","https://www.v2ex.com/").setSleepTime(30000).setDomain("v2ex.com"),
                topicInfoPipeline, V2exDTO.class);
        ooSpider.addUrl("https://www.v2ex.com/?tab=hot")
                .run();
        stopwatch.stop();

        /*String s = HttpClientUtil.getWebPage("https://api.readhub.cn/topic?lastCursor=&pageSize=20");
        JSONObject jsonObject = new JSONObject(s);
        JSONArray data = jsonObject.getJSONArray("data");
        Date now = new Date();
        for (Object d : data) {
            JSONObject j = (JSONObject) d;
            String summary = j.getStr("summary");
            String publishDate = j.getStr("publishDate");
            String title = j.getStr("title");
            String url = "";
            JSONArray newsArray = j.getJSONArray("newsArray");
            if (!newsArray.isEmpty()) {
                JSONObject ja = (JSONObject) newsArray.get(0);
                url = ja.getStr("url");
            }
//                order = ((JSONObject) d).getInt("order");
            FqTopic fqTopic = new FqTopic();
            fqTopic.setAuthor("");
            fqTopic.setAuthorIcon("");
            fqTopic.setCommentCount(0);
            fqTopic.setContent(summary+"<br>发布时间:"+publishDate+"<br>相关地址:"+url);
            fqTopic.setTitle(title);
            fqTopic.setSource(SpiderSourceEnum.READ_HUB.getValue());
            fqTopic.setGmtCreate(now);
            fqTopic.setType("");
            fqTopicService.insert(fqTopic);
        }*/
    } catch (Exception e) {
        logger.error("爬虫出错",e);
    }
    long seconds = stopwatch.elapsed(TimeUnit.SECONDS);
    logger.info("爬虫数据更新完毕,耗时{}秒",seconds);
}