Java Code Examples for org.jsoup.nodes.Element#nextElementSibling()
The following examples show how to use
org.jsoup.nodes.Element#nextElementSibling() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JsoupInstanceListExtractor.java From wandora with GNU General Public License v3.0 | 8 votes |
private void parseTopic(Element classElement, Topic typeTopic) throws TopicMapException { System.out.println(classElement.text()); Topic t = getOrCreateTopic(tm, null, classElement.text()); if(typeTopic == null) typeTopic = wandoraClass; t.addType(typeTopic); // See if the next element is a list (of instances) Element listWrapper = classElement.nextElementSibling(); if(listWrapper != null && !listWrapper.children().isEmpty()) { for(Element listCandidate: listWrapper.children()) { if(listCandidate.tagName().equals("ul")) { parseList(listCandidate, t); } } } }
Example 2
Source File: JsoupSuperSubClassListExtractor.java From wandora with GNU General Public License v3.0 | 8 votes |
private void parseTopic(Element classElement, Topic classTopic) throws TopicMapException { String name = classElement.text().trim(); if(name.length() == 0) return; Topic t = getOrCreateTopic(tm, null , name); if(classTopic == null) classTopic = wandoraClass; makeSubclassOf(tm, t, classTopic); // See if the next element is a list (of instances) Element listWrapper = classElement.nextElementSibling(); if(listWrapper != null && !listWrapper.children().isEmpty()) { for(Element listCandidate: listWrapper.children()){ if(listCandidate.tagName().equals("ul")) parseList(listCandidate, t); } } }
Example 3
Source File: Elements.java From astor with GNU General Public License v2.0 | 6 votes |
private Elements siblings(String query, boolean next, boolean all) { Elements els = new Elements(); Evaluator eval = query != null? QueryParser.parse(query) : null; for (Element e : this) { do { Element sib = next ? e.nextElementSibling() : e.previousElementSibling(); if (sib == null) break; if (eval == null) els.add(sib); else if (sib.is(eval)) els.add(sib); e = sib; } while (all); } return els; }
Example 4
Source File: BakaTsukiParserAlternative.java From coolreader with MIT License | 6 votes |
/*** * Look for <h3>after * <h2>containing the volume list. Treat each li in dl/ul/div as the chapters. * * @param novel * @param h2 * @return */ private static ArrayList<BookModel> parseBooksMethod1(NovelCollectionModel novel, Element h2, String language) { // Log.d(TAG, "method 1"); ArrayList<BookModel> books = new ArrayList<BookModel>(); Element bookElement = h2; boolean walkBook = true; int bookOrder = 0; do { bookElement = bookElement.nextElementSibling(); if (bookElement == null || bookElement.tagName() == "h2") walkBook = false; else if (bookElement.tagName() != "h3") { Elements h3s = bookElement.select("h3"); if (h3s != null && h3s.size() > 0) { for (Element h3 : h3s) { bookOrder = processH3(novel, books, h3, bookOrder, language); } } } else if (bookElement.tagName() == "h3") { bookOrder = processH3(novel, books, bookElement, bookOrder, language); } } while (walkBook); return books; }
Example 5
Source File: Elements.java From astor with GNU General Public License v2.0 | 6 votes |
private Elements siblings(String query, boolean next, boolean all) { Elements els = new Elements(); Evaluator eval = query != null? QueryParser.parse(query) : null; for (Element e : this) { do { Element sib = next ? e.nextElementSibling() : e.previousElementSibling(); if (sib == null) break; if (eval == null) els.add(sib); else if (sib.is(eval)) els.add(sib); e = sib; } while (all); } return els; }
Example 6
Source File: LoadMessagesTask.java From SteamGifts with MIT License | 6 votes |
private List<IEndlessAdaptable> loadMessages(Document document) { List<IEndlessAdaptable> list = new ArrayList<>(); Elements children = document.select(".comments__entity"); for (Element element : children) { Element link = element.select(".comments__entity__name a").first(); if (link != null) { MessageHeader message = new MessageHeader(link.text(), link.absUrl("href")); Element commentElement = element.nextElementSibling(); if (commentElement != null) Utils.loadComments(commentElement, message, Comment.Type.COMMENT); // add the message & all associated comments. list.add(message); list.addAll(message.getComments()); } } return list; }
Example 7
Source File: CifnewsPageHandler.java From cetty with Apache License 2.0 | 6 votes |
@Override public Element appendBody(Elements tempBody) { final Element articleBody = new Element(Tag.valueOf("div"), ""); String blockquote = tempBody.select("div.fetch-read>div.summary").text(); buildBlockquote(blockquote, articleBody); Elements inner = tempBody.select("div.article-inner>*"); for (Element pEl : inner) { if (pEl.select("div.fetch-present").size() != 0) { continue; } Element imgEl = pEl.select("p>img").first(); if (imgEl != null) { Element figure = buildFigure(imgEl); if (imgEl.nextElementSibling() != null && imgEl.nextElementSibling().tagName().equals("p")) { Element figcaption = buildFigcaption(imgEl.nextElementSibling().text()); figure.appendChild(figcaption); articleBody.appendChild(figure); continue; } articleBody.appendChild(figure); continue; } articleBody.appendChild(pEl); } return articleBody; }
Example 8
Source File: WikiCorpusTask.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
private static String extractFirstParagraphs(Document document) { Elements paragraphs = document.select("p"); StringBuilder sb = new StringBuilder(); for (Element p : paragraphs) { if (!p.parent().hasClass("mw-parser-output") || !p.children().isEmpty() && p.child(0).attr("style").equals("display:none")) { continue; } sb.append(p.text()).append("\n"); if (p.nextElementSibling() != null && (p.nextElementSibling().className().startsWith("toc") || !p.nextElementSibling().nodeName().equals("p") && !p.nextElementSibling().attr("style").equals("display:none"))) { break; } // // if (p.className().startsWith("toc")) { // break; // } } return sb.toString(); }
Example 9
Source File: RawWikiCorpusExtractor.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
static String extractFirstParagraphs(Document document) { Elements paragraphs = document.select("p"); StringBuilder sb = new StringBuilder(); for (Element p : paragraphs) { if (!p.parent().hasClass("mw-parser-output") || !p.children().isEmpty() && p.child(0).attr("style").equals("display:none")) { continue; } sb.append(p.text()).append("\n"); if ( p.nextElementSibling() != null && (p.nextElementSibling().className().startsWith("toc") || !p.nextElementSibling().nodeName().equals("p") && !p.nextElementSibling().attr("style").equals("display:none"))) { break; } // // if (p.className().startsWith("toc")) { // break; // } } return sb.toString(); }
Example 10
Source File: AxisSelector.java From CrawlerForReader with Apache License 2.0 | 5 votes |
/** * 节点后面的全部同胞节点following-sibling * * @param e * @return */ public Elements followingSibling(Element e) { Elements rs = new Elements(); Element tmp = e.nextElementSibling(); while (tmp != null) { rs.add(tmp); tmp = tmp.nextElementSibling(); } return rs; }
Example 11
Source File: CommonUtil.java From JsoupXpath with Apache License 2.0 | 5 votes |
public static Elements followingSibling(Element el){ Elements rs = new Elements(); Element tmp = el.nextElementSibling(); while (tmp!=null){ rs.add(tmp); tmp = tmp.nextElementSibling(); } if (rs.size() > 0){ return rs; } return null; }
Example 12
Source File: FollowingSiblingOneSelector.java From JsoupXpath with Apache License 2.0 | 5 votes |
/** * * @param context * @return res */ @Override public XValue apply(Elements context) { List<Element> total = new LinkedList<>(); for (Element el : context){ if (el.nextElementSibling()!=null){ total.add(el.nextElementSibling()); } } Elements newContext = new Elements(); newContext.addAll(total); return XValue.create(newContext); }
Example 13
Source File: BakaTsukiParserAlternative.java From coolreader with MIT License | 5 votes |
public static int processH3(NovelCollectionModel novel, ArrayList<BookModel> books, Element bookElement, int bookOrder, String language) { // Log.d(TAG, "Found: " +bookElement.text()); BookModel book = new BookModel(); book.setTitle(CommonParser.sanitize(bookElement.text(), true)); book.setOrder(bookOrder); ArrayList<PageModel> chapterCollection = new ArrayList<PageModel>(); String parent = novel.getPage() + Constants.NOVEL_BOOK_DIVIDER + book.getTitle(); // parse the chapters. boolean walkChapter = true; int chapterOrder = 0; Element chapterElement = bookElement; do { chapterElement = chapterElement.nextElementSibling(); if (chapterElement == null || chapterElement.tagName() == "h2" || chapterElement.tagName() == "h3") { walkChapter = false; } else { Elements chapters = chapterElement.select("li"); for (Element chapter : chapters) { PageModel p = processLI(chapter, parent, chapterOrder, language); if (p != null) { chapterCollection.add(p); ++chapterOrder; } } } book.setChapterCollection(chapterCollection); } while (walkChapter); books.add(book); ++bookOrder; return bookOrder; }
Example 14
Source File: BakaTsukiParserAlternative.java From coolreader with MIT License | 5 votes |
/*** * Only have 1 book, chapter list is nested in ul/dl, e.g:Fate/Apocrypha, Gekkou * Parse the li as the chapters. * * @param novel * @param h2 * @return */ private static ArrayList<BookModel> parseBooksMethod3(NovelCollectionModel novel, Element h2, String language) { ArrayList<BookModel> books = new ArrayList<BookModel>(); Element bookElement = h2; boolean walkBook = true; int bookOrder = 0; do { bookElement = bookElement.nextElementSibling(); if (bookElement == null || bookElement.tagName() == "h2") walkBook = false; else if (bookElement.tagName() == "ul" || bookElement.tagName() == "dl") { // Log.d(TAG, "Found: " +bookElement.text()); BookModel book = new BookModel(); book.setTitle(CommonParser.sanitize(h2.text(), true)); book.setOrder(bookOrder); ArrayList<PageModel> chapterCollection = new ArrayList<PageModel>(); String parent = novel.getPage() + Constants.NOVEL_BOOK_DIVIDER + book.getTitle(); // parse the chapters. int chapterOrder = 0; Elements chapters = bookElement.select("li"); for (Element chapter : chapters) { PageModel p = processLI(chapter, parent, chapterOrder, language); if (p != null) { chapterCollection.add(p); ++chapterOrder; } } book.setChapterCollection(chapterCollection); books.add(book); ++bookOrder; } } while (walkBook); return books; }
Example 15
Source File: AxisSelector.java From CrawlerForReader with Apache License 2.0 | 5 votes |
/** * 返回下一个同胞节点(扩展) 语法 following-sibling-one * * @param e * @return */ public Elements followingSiblingOne(Element e) { Elements rs = new Elements(); if (e.nextElementSibling() != null) { rs.add(e.nextElementSibling()); } return rs; }
Example 16
Source File: ESchoolParser.java From substitution-schedule-parser with Mozilla Public License 2.0 | 4 votes |
private void parseTable(Element table, SubstitutionScheduleDay day) { for (Element th : table.select("th[colspan=10]")) { String lesson; Pattern pattern = Pattern.compile("(\\d+)\\. Stunde"); Matcher matcher = pattern.matcher(th.text()); if (matcher.find()) { lesson = matcher.group(1); } else { lesson = th.text(); } // skip over table headers Element row = th.parent().nextElementSibling().nextElementSibling(); while (row != null && row.select("th").size() == 0) { Substitution subst = new Substitution(); subst.setLesson(lesson); Elements columns = row.select("td"); String[] classes = columns.get(0).text().split(", |\\+"); subst.setClasses(new HashSet<>(Arrays.asList(classes))); subst.setPreviousTeacher(getPreviousValue(columns.get(1))); subst.setTeacher(getNewValue(columns.get(1))); subst.setPreviousSubject(getPreviousValue(columns.get(2))); subst.setSubject(getNewValue(columns.get(2))); subst.setPreviousRoom(getPreviousValue(columns.get(3))); subst.setRoom(getNewValue(columns.get(3))); if (columns.get(4).text().isEmpty()) { subst.setType("Vertretung"); subst.setColor(colorProvider.getColor("Vertretung")); } else { String desc = columns.get(4).text(); subst.setDesc(desc); String recognizedType = recognizeType(desc); if (recognizedType == null) recognizedType = "Vertretung"; subst.setType(recognizedType); subst.setColor(colorProvider.getColor(recognizedType)); } day.addSubstitution(subst); row = row.nextElementSibling(); } } }
Example 17
Source File: JiSho.java From ankihelper with GNU General Public License v3.0 | 4 votes |
public List<Definition> wordLookup(String key) { try { Document doc = Jsoup.connect(wordUrl + key) .userAgent("Mozilla") .timeout(5000) .get(); Elements entrys = doc.select("div.concept_light"); ArrayList<Definition> defList = new ArrayList<>(); if (entrys.size() > 0) { for (Element ele : entrys){ String furigana = ""; String writing = ""; String mp3_url = ""; //String meaning_tag = ""; //String definition = ""; Elements furigana_soup = ele.select("span.furigana"); if(furigana_soup.size() > 0){ furigana = furigana_soup.get(0).text().trim(); } Elements writing_soup = ele.select("span.text"); if(writing_soup.size() > 0){ writing = writing_soup.get(0).text().trim(); } Elements audio_soup = ele.select("audio > source"); if(audio_soup.size() > 0){ mp3_url = "[sound:" +audio_soup.get(0).attr("src") + "]"; } Elements meaning_tags_soup = ele.select("div.meaning-tags"); for(Element tag : meaning_tags_soup){ String meaning_tag = tag.text().trim(); Element word_def_soup = tag.nextElementSibling(); if(word_def_soup != null){ for(Element defSoup : word_def_soup.select("div.meaning-definition > span.meaning-meaning")){ HashMap<String, String> defMap = new HashMap<>(); String definition = "<i><font color='grey'>" + meaning_tag + "</font></i> " + defSoup.text().trim(); defMap.put(EXP_ELE[0], writing); defMap.put(EXP_ELE[1], furigana); defMap.put(EXP_ELE[2], mp3_url); //defMap.put(EXP_ELE[3], meaning_tag); defMap.put(EXP_ELE[3], definition); String audioIndicator = ""; if(!mp3_url.isEmpty()){ audioIndicator = "<font color='#227D51' >"+AUDIO_TAG + "</font>"; } String export_html = "<b>" + writing + "</b> <font color='grey'>" + furigana + "</font> " + audioIndicator + "<br/>" + definition; defList.add(new Definition(defMap, export_html)); } } } } } return defList; } catch (IOException ioe) { //Log.d("time out", Log.getStackTraceString(ioe)); Toast.makeText(MyApplication.getContext(), Log.getStackTraceString(ioe), Toast.LENGTH_SHORT).show(); return new ArrayList<Definition>(); } }
Example 18
Source File: JsoupDefinitionListExtractor.java From wandora with GNU General Public License v3.0 | 4 votes |
private void parseName(Element name, Topic listTopic) throws TopicMapException { Topic nameTopic = getOrCreateTopic(tm, null, name.text()); nameTopic.addType(definitionType); declareChild(listTopic, nameTopic); Element defCandidate = name.nextElementSibling(); while(defCandidate != null && defCandidate.tagName().equals("dd")){ nameTopic.setData(definitionType, langTopic, defCandidate.text()); defCandidate = defCandidate.nextElementSibling(); } }