Java Code Examples for us.codecraft.webmagic.Spider#get()

The following examples show how to use us.codecraft.webmagic.Spider#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaiduBaikePageProcessor.java    From webmagic with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    //single download
    Spider spider = Spider.create(new BaiduBaikePageProcessor()).thread(2);
    String urlTemplate = "http://baike.baidu.com/search/word?word=%s&pic=1&sug=1&enc=utf8";
    ResultItems resultItems = spider.<ResultItems>get(String.format(urlTemplate, "水力发电"));
    System.out.println(resultItems);

    //multidownload
    List<String> list = new ArrayList<String>();
    list.add(String.format(urlTemplate,"风力发电"));
    list.add(String.format(urlTemplate,"太阳能"));
    list.add(String.format(urlTemplate,"地热发电"));
    list.add(String.format(urlTemplate,"地热发电"));
    List<ResultItems> resultItemses = spider.<ResultItems>getAll(list);
    for (ResultItems resultItemse : resultItemses) {
        System.out.println(resultItemse.getAll());
    }
    spider.close();
}
 
Example 2
Source File: PostsServiceImpl.java    From plumemo with Apache License 2.0 5 votes vote down vote up
private void crawler(PostsVO postsVO) {
    Class platformClass = PlatformEnum.getEnumTypeMap().get(postsVO.getPlatformType()).getPlatformClass();
    Spider spider = OOSpider.create(Site.me(), platformClass).setDownloader(new HttpClientDownloader());
    Object object = spider.get(postsVO.getSourceUri());

    String join = "";
    if (postsVO.getPlatformType().equals(PlatformEnum.JIAN_SHU.getType())) {
        JianShuVO jianShuVO = (JianShuVO) object;
        postsVO.setTitle(jianShuVO.getTitle());
        join = String.join("", jianShuVO.getContent());
    } else if (postsVO.getPlatformType().equals(PlatformEnum.JUE_JIN.getType())) {
        JueJinVO jueJinVO = (JueJinVO) object;
        postsVO.setTitle(jueJinVO.getTitle());
        join = String.join("", jueJinVO.getContent());
    } else if (postsVO.getPlatformType().equals(PlatformEnum.SEGMENT_FAULT.getType())) {
        SegmentFaultVO segmentFaultVO = (SegmentFaultVO) object;
        postsVO.setTitle(segmentFaultVO.getTitle());
        join = String.join("", segmentFaultVO.getContent());
    } else if (postsVO.getPlatformType().equals(PlatformEnum.CSDN.getType())) {
        CSDNVO csdnVO = (CSDNVO) object;
        postsVO.setTitle(csdnVO.getTitle());
        join = String.join("", csdnVO.getContent());
    } else if (postsVO.getPlatformType().equals(PlatformEnum.CN_BLOGS.getType())) {
        CNBlogsVO cnBlogsVO = (CNBlogsVO) object;
        postsVO.setTitle(cnBlogsVO.getTitle());
        join = String.join("", cnBlogsVO.getContent());
    } else {
        ExceptionUtil.rollback(ErrorEnum.PARAM_ERROR);
    }
    String converted = new Remark().convertFragment(join);
    postsVO.setContent(converted);
}