Python bs4.element.Tag() Examples

The following are 28 code examples of bs4.element.Tag(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module bs4.element , or try the search function .
Example #1
Source File: scrape_announcements.py    From Penny-Dreadful-Tools with GNU General Public License v3.0 6 votes vote down vote up
def parse_build_notes(h: Tag) -> None:
    entries = []
    for n in h.next_elements:
        if isinstance(n, Tag) and n.name == 'p':
            if 'posted-in' in n.attrs.get('class', []):
                break
            if n.text:
                entries.append(n.text)

    embed = {
        'title': 'MTGO Build Notes',
        'type': 'rich',
        'description': '\n'.join(entries),
        'url': fetcher.find_announcements()[0],
    }
    if configuration.get_optional_str('bugs_webhook_id') is not None:
        fetch_tools.post_discord_webhook(
            configuration.get_str('bugs_webhook_id'),
            configuration.get_str('bugs_webhook_token'),
            embeds=[embed],
            username='Magic Online Announcements',
            avatar_url='https://magic.wizards.com/sites/mtg/files/styles/auth_small/public/images/person/wizards_authorpic_larger.jpg'
            ) 
Example #2
Source File: scrape_bugblog.py    From Penny-Dreadful-Tools with GNU General Public License v3.0 6 votes vote down vote up
def parse_changelog(collapsible_block: Tag) -> None:
    # They never show Fixed bugs in the Bug Blog anymore.  Fixed bugs are now listed on the Build Notes section of MTGO weekly announcements.
    # This is frustrating.
    for added in collapsible_block.find_all('ul'):
        for item in added.find_all('li'):
            print(item)
            bbt = strings.remove_smartquotes(item.get_text())

            issue = find_issue_by_code(bbt)
            if issue is not None:
                if not repo.is_issue_from_bug_blog(issue):
                    print('Adding Bug Blog to labels')
                    issue.add_to_labels('From Bug Blog')
            elif find_issue_by_name(bbt):
                print('Already exists.')
            else:
                print('Creating new issue')
                text = 'From Bug Blog.\nBug Blog Text: {0}'.format(bbt)
                repo.get_repo().create_issue(bbt, body=strings.remove_smartquotes(text), labels=['From Bug Blog']) 
Example #3
Source File: decrypt.py    From Anti-Spider with MIT License 6 votes vote down vote up
def decrypt_woff_tag(tag,TTGlyphs,d_list):
    contents = tag.contents
    _ = []
    while contents:
        i = contents.pop(0)
        if isinstance(i, Tag):
            if i.name in decrypt_tags:
                text = dec(i.text)
                for index,name in enumerate(TTGlyphs):
                    if text in name:
                        i = d_list[index]
            else:
                continue
        if not isinstance(i, str):
            continue
        _.append(i)
    return ''.join(_) 
Example #4
Source File: scrape_bugblog.py    From Penny-Dreadful-Tools with GNU General Public License v3.0 6 votes vote down vote up
def check_if_removed_from_bugblog(bbt: Match, b: Tag, issue: Issue) -> None:
    if bbt is not None:
        text = strings.remove_smartquotes(bbt.group(1).strip())
        for row in b.find_all('tr'):
            data = row.find_all('td')
            rowtext = strings.remove_smartquotes(data[1].text.strip())
            if rowtext == text:
                break
            if strip_squarebrackets(rowtext) == strip_squarebrackets(text):
                # Fix this
                print("Issue #{id}'s bug blog text has differing autocard notation.".format(id=issue.number))
                old_bbt = strings.get_body_field(issue.body, 'Bug Blog Text')
                body = re.sub(BBT_REGEX, 'Bug Blog Text: {0}'.format(rowtext), issue.body, flags=re.MULTILINE)
                new_bbt = strings.get_body_field(body, 'Bug Blog Text')
                issue.edit(body=body)
                print('Updated to `{0}`'.format(rowtext))
                issue.create_comment(f'Changed bug blog text from `{old_bbt}` to `{new_bbt}`')
                break
        else:
            print('{id} is fixed!'.format(id=issue.number))
            repo.create_comment(issue, 'This bug has been removed from the bug blog!')
            issue.edit(state='closed') 
Example #5
Source File: deeru_html.py    From DeerU with GNU General Public License v3.0 6 votes vote down vote up
def get_tag_from_bs(cls, soup):
        from bs4 import BeautifulSoup as bs
        from bs4.element import Tag as bs_tag
        father = None
        if isinstance(soup, bs):
            father = soup.find()
        elif isinstance(soup, bs_tag):
            father = soup
        if not father or not father.name:
            return None

        tag = cls(father.name, father.text, father.attrs)

        for c in father.children:
            c_tag = cls.get_tag_from_bs(c)
            tag.append(c_tag)
        return tag 
Example #6
Source File: helpers.py    From robobrowser with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def ensure_soup(value, parser=None):
    """Coerce a value (or list of values) to Tag (or list of Tag).

    :param value: String, BeautifulSoup, Tag, or list of the above
    :param str parser: Parser to use; defaults to BeautifulSoup default
    :return: Tag or list of Tags

    """
    if isinstance(value, BeautifulSoup):
        return value.find()
    if isinstance(value, Tag):
        return value
    if isinstance(value, list):
        return [
            ensure_soup(item, parser=parser)
            for item in value
        ]
    parsed = BeautifulSoup(value, features=parser)
    return parsed.find() 
Example #7
Source File: fetcher.py    From Penny-Dreadful-Tools with GNU General Public License v3.0 5 votes vote down vote up
def parse_article_item_extended(a: Tag) -> Tuple[Tag, str]:
    title = a.find_all('h3')[0]
    link = 'http://magic.wizards.com' + a.find_all('a')[0]['href']
    return (title, link) 
Example #8
Source File: Junos.py    From assimilator with MIT License 5 votes vote down vote up
def get(self):
		if not self.dev.connected:
			logger.error("{0}: Firewall timed out or incorrect device credentials.".format(self.firewall_config['name']))
			return {'error' : 'Could not connect to device.'}, 504
		else:
			logger.info("{0}: Connected successfully.".format(self.firewall_config['name']))
		try:
			rpc = etree.tostring(str(jns.rpc.get_security_policies_hit_count()), encoding='unicode')
		except Exception as e:
			logger.error("Error parsing rpc: {0}".format(str(e)))
			return {'error' : 'Error parsing soup.'}, 500
		finally:
			self.dev.close()
		soup = BS(rpc,'xml')
		entries = list()
		for hitcount in soup.find('policy-hit-count').children:
			if type(hitcount) != Tag or hitcount.name != 'policy-hit-count-entry':
				continue
			aux = {
			'count' : int(hitcount.find('policy-hit-count-count').text),
			'from' : hitcount.find('policy-hit-count-from-zone').text,
			'to' : hitcount.find('policy-hit-count-to-zone').text,
			'policy' : hitcount.find('policy-hit-count-policy-name').text
			}
			entries.append(aux)
		return {'len' : len(entries), 'hitcount' : entries} 
Example #9
Source File: Junos.py    From assimilator with MIT License 5 votes vote down vote up
def get(self):
		if not self.dev.connected:
			logger.error("{0}: Firewall timed out or incorrect device credentials.".format(self.firewall_config['name']))
			return {'error' : 'Could not connect to device.'}, 504
		else:
			logger.info("{0}: Connected successfully.".format(self.firewall_config['name']))
		rpc = etree.tostring(self.dev.rpc.get_commit_information(), encoding='unicode')
		soup = BS(rpc,'xml')
		entries = list()
		logger.debug("soup: {0}".format(str(soup)))
		for entry in soup.find('commit-information').children:
			if type(entry) != Tag:
				continue
			entries.append({'user' : entry.user.text, 'sequence' : entry.find('sequence-number').text, 'date' : entry.find('date-time').text, 'comment' : entry.log.text if entry.log else None})
		return {'len' : len(entries), 'commit' : entries} 
Example #10
Source File: helpers.py    From NBAsh with MIT License 5 votes vote down vote up
def GetTextOfItem(item, default_value=''):
    if item is not None and isinstance(item, element.Tag):
        return item.get_text()
    else:
        return default_value 
Example #11
Source File: filter_callouts.py    From dactyl with MIT License 5 votes vote down vote up
def filter_soup(soup, currentpage={}, config={}, **kwargs):
    """
    Find patterns that look like callouts, for example **Note:**, and add
    callout classes to their parent elements (usually <p>)
    """
    # callout classes are defined by page>target>config>default
    callout_classes = currentpage.get(CALLOUT_TYPES_FIELD,
                        config.get(CALLOUT_TYPES_FIELD,
                        DEFAULT_CALLOUT_TYPES))
    callout_intro = re.compile(r"("+"|".join(callout_classes)+"):?$", re.I)
    callout_base_class = currentpage.get(CALLOUT_CLASS_FIELD,
                        config.get(CALLOUT_CLASS_FIELD,
                        DEFAULT_CALLOUT_CLASS))

    callouts = soup.find_all(name=["strong","em"], string=callout_intro)
    for c in callouts:
        if not c.previous_sibling: #This callout starts a block
            callout_type = c.string.replace(":","").lower()
            if callout_type in callout_classes:
                if (c.parent.parent.name == "blockquote" and Tag not in
                    [type(u) for u in c.parent.previous_siblings]):
                    # Special case for blockquotes, to allow multiline callouts.
                    # First element of BQ must start with a callout keyword
                    callout_el = c.parent.parent
                else:
                    callout_el = c.parent
                callout_el["class"] = [callout_base_class, callout_type] 
Example #12
Source File: test_feedback.py    From notifications-admin with MIT License 5 votes vote down vote up
def test_email_address_required_for_problems_and_questions(
    client_request,
    mocker,
    data,
    ticket_type,
):
    mocker.patch('app.main.views.feedback.zendesk_client')
    client_request.logout()
    page = client_request.post(
        'main.feedback',
        ticket_type=ticket_type,
        _data=data,
        _expected_status=200
    )
    assert isinstance(page.find('span', {'class': 'error-message'}), element.Tag) 
Example #13
Source File: tbentries.py    From open-context-py with GNU General Public License v3.0 5 votes vote down vote up
def check_fix_photobox(self, node):
        """ gets rid of empty tags """
        if isinstance(node, Tag):
            if node.get('class') is not None:
                node_clases = node.get('class')
                if node_clases[0] == self.pc_photobox_class:
                    # print('Check: '+ node_clases[0])
                    make_span = False
                    show_img = False
                    img_nodes = node.find_all(['img'])
                    if len(img_nodes) < 1:
                        make_span = True
                    else:
                        for img in img_nodes:
                            # print('img style: ' + str(img.get('style')))
                            if img.get('style') is None:
                                show_img = True
                            else:
                                img_styles = img.get('style')
                                if 'display:none;' in img_styles:
                                    # print('ok img..')
                                    pass
                                else:
                                    # print('crap!')
                                    show_img = True
                        if show_img is False:
                            # print('img ok to span transform')
                            make_span = True
                    if make_span:
                        # print('Span transform')
                        node.name = 'span' 
Example #14
Source File: babelnovel.py    From lightnovel-crawler with Apache License 2.0 5 votes vote down vote up
def download_chapter_body(self, chapter):
        logger.info('Visiting %s', chapter['json_url'])
        data = self.get_json(chapter['json_url'])

        soup = BeautifulSoup(data['data']['content'], 'lxml')
        if self.bad_selectors:
            for tag in soup.select(self.bad_selectors):
                tag.extract()
            # end for
        # end if

        body = soup.find('body')
        self.clean_contents(body)

        for tag in body.contents:
            if not str(tag).strip():
                tag.extract()
            elif isinstance(tag, Tag):
                tag.name = 'p'
            # end if
        # end for

        # body = data['data']['content']
        result = str(body)
        result = re.sub(r'\n\n', '<br><br>', result)
        return result
    # end def
# end class 
Example #15
Source File: helpers.py    From robobrowser with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def lowercase_attr_names(tag):
    """Lower-case all attribute names of the provided BeautifulSoup tag.
    Note: this mutates the tag's attribute names and does not return a new
    tag.

    :param Tag: BeautifulSoup tag

    """
    # Use list comprehension instead of dict comprehension for 2.6 support
    tag.attrs = dict([
        (key.lower(), value)
        for key, value in iteritems(tag.attrs)
    ]) 
Example #16
Source File: visual_linker.py    From fonduer with MIT License 5 votes vote down vote up
def _coordinates_from_HTML(
        self, page: Tag, page_num: int
    ) -> Tuple[
        List[PdfWord], Dict[PdfWordId, Bbox],
    ]:
        pdf_word_list: List[PdfWord] = []
        coordinate_map: Dict[PdfWordId, Bbox] = {}
        block_coordinates: Dict[PdfWordId, Tuple[int, int]] = {}
        blocks = page.find_all("block")
        i = 0  # counter for word_id in page_num
        for block in blocks:
            x_min_block = int(float(block.get("xmin")))
            y_min_block = int(float(block.get("ymin")))
            lines = block.find_all("line")
            for line in lines:
                y_min_line = int(float(line.get("ymin")))
                y_max_line = int(float(line.get("ymax")))
                words = line.find_all("word")
                for word in words:
                    xmin = int(float(word.get("xmin")))
                    xmax = int(float(word.get("xmax")))
                    for content in self.separators.split(word.getText()):
                        if len(content) > 0:  # Ignore empty characters
                            word_id: PdfWordId = (page_num, i)
                            pdf_word_list.append((word_id, content))
                            coordinate_map[word_id] = Bbox(
                                page_num, y_min_line, y_max_line, xmin, xmax,
                            )
                            block_coordinates[word_id] = (y_min_block, x_min_block)
                            i += 1
        # sort pdf_word_list by page, block top then block left, top, then left
        pdf_word_list = sorted(
            pdf_word_list,
            key=lambda word_id__: block_coordinates[word_id__[0]]
            + (coordinate_map[word_id__[0]].top, coordinate_map[word_id__[0]].left),
        )
        return pdf_word_list, coordinate_map 
Example #17
Source File: scrape_bugblog.py    From Penny-Dreadful-Tools with GNU General Public License v3.0 5 votes vote down vote up
def find_bbt_in_issue_title(issue: Issue, known_issues: Tag) -> None:
    title = strip_squarebrackets(issue.title).replace(' ', '')
    for row in known_issues.find_all('tr'):
        data = row.find_all('td')
        row_text = strip_squarebrackets(data[1].text.strip()).replace(' ', '')
        if row_text == title:
            body = issue.body
            body += '\nBug Blog Text: {0}'.format(data[1].text.strip())
            if body != issue.body:
                issue.edit(body=body)
            return 
Example #18
Source File: scrape_bugblog.py    From Penny-Dreadful-Tools with GNU General Public License v3.0 5 votes vote down vote up
def check_for_missing_bugs(b: Tag) -> None:
    for row in b.find_all('tr'):
        data = row.find_all('td')
        row_text = data[1].text.strip()
        if row_text == 'Description':
            # BS4 is bad.
            continue
        issue = find_issue_by_code(row_text)
        if issue:
            labels = [c.name for c in issue.labels]
            categories = [c for c in labels if c in strings.METACATS]
            if categories:
                continue
            bbcat = re.match(strings.REGEX_BBCAT, data[2].text.strip())
            if bbcat is None:
                continue
            g1 = bbcat.group(1).strip()
            if g1 in strings.METACATS:
                issue.add_to_labels(g1)
                continue
            if bbcat.group(2) is not None:
                g2 = bbcat.group(2).strip()
                if g2 in strings.METACATS:
                    issue.add_to_labels(g2)
                    continue
            print(f'Unknown BBCat: {bbcat.group(0)}')
            continue
        print('Could not find issue for `{row}`'.format(row=row_text))
        text = 'From Bug Blog.\nBug Blog Text: {0}'.format(row_text)
        repo.get_repo().create_issue(strings.remove_smartquotes(row_text), body=strings.remove_smartquotes(text), labels=['From Bug Blog']) 
Example #19
Source File: scrape_announcements.py    From Penny-Dreadful-Tools with GNU General Public License v3.0 5 votes vote down vote up
def parse_downtimes(h: Tag) -> None:
    for n in h.next_elements:
        if isinstance(n, Tag) and n.text:
            with open('downtimes.txt', 'w', encoding='utf-8') as f:
                txt = n.text.strip()
                txt = txt.replace("Please note that there are no more 'extended' or 'normal' downtimes; in the new world with fewer downtimes, they're all the same length of time.", '')
                print(txt)
                f.write(txt)
            break 
Example #20
Source File: scrape_announcements.py    From Penny-Dreadful-Tools with GNU General Public License v3.0 5 votes vote down vote up
def parse_header(h: Tag) -> None:
    txt = h.text
    if txt.startswith('Downtime'):
        parse_downtimes(h)
    elif txt.startswith('Build Notes'):
        parse_build_notes(h) 
Example #21
Source File: fetcher.py    From Penny-Dreadful-Tools with GNU General Public License v3.0 5 votes vote down vote up
def get_article_archive() -> List[Tuple[Tag, str]]:
    try:
        html = fetch_tools.fetch('http://magic.wizards.com/en/articles/archive/184956')
    except fetch_tools.FetchException:
        html = fetch_tools.fetch('http://magic.wizards.com/en/articles/archive/')
    soup = BeautifulSoup(html, 'html.parser')
    return [parse_article_item_extended(a) for a in soup.find_all('div', class_='article-item-extended')] 
Example #22
Source File: doc.py    From bot with MIT License 5 votes vote down vote up
def _match_end_tag(tag: Tag) -> bool:
        """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
        for attr in SEARCH_END_TAG_ATTRS:
            if attr in tag.get("class", ()):
                return True

        return tag.name == "table" 
Example #23
Source File: xss_utils.py    From ITWSV with MIT License 4 votes vote down vote up
def study(bs_node, parent=None, keyword=""):
    entries = []

    # if parent is None:
    #  print("Keyword is: {0}".format(keyword))
    if keyword in str(bs_node).lower():
        if isinstance(bs_node, element.Tag):
            if keyword in str(bs_node.attrs):

                for k, v in bs_node.attrs.items():
                    if keyword in v:
                        # print("Found in attribute value {0} of tag {1}".format(k, bs_node.name))
                        noscript = close_noscript(bs_node)
                        d = {"type": "attrval", "name": k, "tag": bs_node.name, "noscript": noscript}
                        if d not in entries:
                            entries.append(d)

                    if keyword in k:
                        # print("Found in attribute name {0} of tag {1}".format(k, bs_node.name))
                        noscript = close_noscript(bs_node)
                        d = {"type": "attrname", "name": k, "tag": bs_node.name, "noscript": noscript}
                        if d not in entries:
                            entries.append(d)

            elif keyword in bs_node.name:
                # print("Found in tag name")
                noscript = close_noscript(bs_node)
                d = {"type": "tag", "value": bs_node.name, "noscript": noscript}
                if d not in entries:
                    entries.append(d)

            # recursively search injection points for the same variable
            for x in bs_node.contents:
                for entry in study(x, parent=bs_node, keyword=keyword):
                    if entry not in entries:
                        entries.append(entry)

        elif isinstance(bs_node, element.Comment):
            # print("Found in comment, tag {0}".format(parent.name))
            noscript = close_noscript(bs_node)
            d = {"type": "comment", "parent": parent.name, "noscript": noscript}
            if d not in entries:
                entries.append(d)

        elif isinstance(bs_node, element.NavigableString):
            # print("Found in text, tag {0}".format(parent.name))
            noscript = close_noscript(bs_node)
            d = {"type": "text", "parent": parent.name, "noscript": noscript}
            if d not in entries:
                entries.append(d)

    return entries


# generate a list of payloads based on where in the webpage the js-code will be injected 
Example #24
Source File: tbentries.py    From open-context-py with GNU General Public License v3.0 4 votes vote down vote up
def remove_empty_node(self, node):
        """ gets rid of empty tags """
        if isinstance(node, Tag):
            keep_nodes = [
                'img',
                'td',
                'tr',
                'th',
            ]
            if node.name.lower() not in keep_nodes:
                remove_node = False
                no_child_remove_tags = [
                    'a',
                    'img',
                    'th',
                    'tr',
                    'td',
                    'strong',
                    'ul',
                    'ol',
                    'li',
                    'em',
                    'i',
                    'u',
                    'b',
                    'sup',
                    'sub',
                    'mark',
                    'q',
                    'samp',
                    'small'
                ]
                ok_child_nodes = node.find_all(no_child_remove_tags)
                if len(ok_child_nodes) < 1:
                    # ok no images check for text
                    all_string = ''
                    for act_string in node.stripped_strings:
                        all_string += str(act_string)
                    for d_child in node.descendants:
                        if isinstance(d_child, Tag):
                            for act_string in d_child.stripped_strings:
                                all_string += str(act_string)
                    all_string = all_string.strip()
                    # print('Check on: <' + node.name.lower() + '> with: ' + str(all_string))
                    if len(all_string) < 1:
                        remove_node = True
                    if isinstance(node.string, str):
                        n_string = node.string
                        n_string = n_string.strip()
                        if len(n_string) < 1:
                            remove_node = True
                        else:
                            remove_node = False
                if remove_node:
                    # print('Removing a: <' + node.name.lower() + '>')
                    node.extract() 
Example #25
Source File: scrape_bugblog.py    From Penny-Dreadful-Tools with GNU General Public License v3.0 4 votes vote down vote up
def parse_knownbugs(b: Tag) -> None:
    # attempt to find all the fixed bugs
    all_codes = b.find_all(string=lambda text: isinstance(text, Comment))
    all_codes = [str(code).replace('\t', ' ') for code in all_codes]
    for issue in repo.get_repo().get_issues():
        # code = re.search(CODE_REGEX, issue.body, re.MULTILINE)
        bbt = re.search(BBT_REGEX, issue.body, re.MULTILINE)
        if bbt is None:
            cards = strings.get_cards_from_string(issue.title)
            if repo.is_issue_from_bug_blog(issue):
                find_bbt_in_body_or_comments(issue)
                find_bbt_in_issue_title(issue, b)
                bbt = re.search(BBT_REGEX, issue.body, re.MULTILINE)
                if bbt is None:
                    print('Issue #{id} {cards} has no Bug Blog text!'.format(id=issue.number, cards=cards))
                    issue.add_to_labels('Invalid Bug Blog')
                continue

            if not cards:
                continue
            lines = b.find_all(string=re.compile(r'\[' + cards[0] + r'\]'))
            if not lines:
                continue
            for line in lines:
                parent = line.parent
                bb_text = parent.get_text().strip()
                if find_issue_by_code(bb_text) is not None:
                    print('Already assigned.')
                    continue
                text = ''.join(parent.strings)
                print(text)
                repo.create_comment(issue, 'Found in bug blog.\nBug Blog Text: {0}'.format(text))
                if not repo.is_issue_from_bug_blog(issue):
                    issue.add_to_labels('From Bug Blog')
            continue
        if 'Invalid Bug Blog' in [i.name for i in issue.labels]:
            issue.remove_from_labels('Invalid Bug Blog')

        if repo.is_issue_from_bug_blog(issue):
            # Don't check for Bug Blog Text if it's not marked as a BB issue (Maybe because it was reopened)
            check_if_removed_from_bugblog(bbt, b, issue)

    check_for_missing_bugs(b) 
Example #26
Source File: extract.py    From dart-fss with MIT License 4 votes vote down vote up
def seek_table(tables: List, includes: Pattern,
               excludes: Union[Pattern, None] = None) -> Tuple[Union[str, None], Union[str, None], Union[str, None]]:
    """ Table 검색 """
    regex = re.compile(r'\d{4}(.*?)\d{2}(.*?)\d{2}')
    for table in tables:
        for tag in table.previous_siblings:
            if tag in tables:
                break
            if isinstance(tag, Tag):
                children = tag.findChildren(text=includes)
                for child in children:
                    title = child
                    if title:
                        title = re.sub(r'\s+', '', title)
                        if excludes and excludes.search(title):
                            continue
                        if len(title) > 12:
                            continue
                        header = table.find_previous('table', class_='nb')
                        if header is None:
                            continue
                        tr_list = header.find_all('tr')
                        if len(tr_list) < 2:
                            continue

                        tr_cnt = 0
                        for tr in tr_list:
                            if regex.search(tr.text):
                                tr_cnt += 1

                        if tr_cnt == 0:
                            found = table.find_previous(text=re.compile(r'\d{4}(.*?)\d{2}(.*?)\d{2}'))
                            if found is None:
                                continue
                            header = found.parent
                            extract_text = re.sub('<.*?>', '\n', str(header))
                            extract_text = extract_text.split('\n')
                            html = '<table class="nb"><tbody>'

                            error = False
                            for t in extract_text:
                                if t.strip() == '':
                                    pass
                                else:
                                    if len(t) > 100:
                                        error = True
                                        break
                                    html += '<tr><td>' + t + '</td></tr>'
                            if error:
                                continue
                            html += '</tbody></table>'
                            header = BeautifulSoup(html, 'html.parser')
                        return title, header, table
    return None, None, None 
Example #27
Source File: Junos.py    From assimilator with MIT License 4 votes vote down vote up
def get(self,args):
		logger.debug("class rules(JUNOS).get({0})".format(str(args)))
		if not self.dev.connected:
			logger.error("{0}: Firewall timed out or incorrect device credentials.".format(self.firewall_config['name']))
			return {'error' : 'Could not connect to device.'}, 504
		else:
			logger.info("{0}: Connected successfully.".format(self.firewall_config['name']))
		try:
			soup = BS(str(etree.tostring(self.dev.rpc.get_firewall_policies(), encoding='unicode')),'xml')
			logger.debug("soup: " + str(soup))
		except Exception as e:
			logger.error("Error parsing soup: {0}".format(str(e)))
			return {'error' : 'Error parsing soup.'}, 500
		finally:
			logger.debug("Closing device...")
			self.dev.close()
		entries = list()
		for context in soup.find("security-policies").children:			
			if type(context) != Tag:
				continue
			elif context.name == "default-policy":
				continue
			else:
				logger.debug("context: {0}".format(str(context)))
			src_zone = context.find("context-information").find("source-zone-name").text
			dst_zone = context.find("context-information").find("destination-zone-name").text
			logger.debug("src_zone: {0}\ndst_zone: {1}\n".format(src_zone,dst_zone))
			for rule in context.children:
				logger.debug("Rule: {0}".format(str(rule)))
				if rule.name == "context-information" or type(rule) != Tag:
					continue
				aux = {
					"enabled" : True if rule.find('policy-state').text == 'enabled' else False,
					"id" : int(rule.find('policy-identifier').text),
				      "action": rule.find('policy-information').find('policy-action').find('action-type').text,
				      "destination": list(),
				      "from": src_zone,
				      "logging": False if rule.find('policy-information').find('policy-action').find('log') else rule.find('policy-information').find('policy-action').find('log'),
				      "name": rule.find('policy-information').find('policy-name').text,
				      "application": list(),
				      "source": list(),
					"to": dst_zone
		   		 	}
				for addr in rule.find('source-addresses').children:
					if type(addr) != Tag:
						continue
					aux['source'].append(addr.find('address-name').text)
				for addr in rule.find('destination-addresses').children:
					if type(addr) != Tag:
						continue
					aux['destination'].append(addr.find('address-name').text)
				for addr in rule.find('applications').children:
					if type(addr) != Tag:
						continue
					aux['application'].append(addr.find('application-name').text)
				entries.append(aux)
		#entries = self.filter(args,entries)
		return {'len' : len(entries), 'rules' : entries} 
Example #28
Source File: mod16.py    From RHEAS with MIT License 4 votes vote down vote up
def download(dbname, dts, bbox):
    """Downloads the MODIS evapotranspiration data product MOD16 for
    a set of dates *dt* and imports them into the PostGIS database *dbname*."""
    log = logging.getLogger(__name__)
    res = 0.01
    urlbase = "http://files.ntsg.umt.edu"
    tiles = modis.findTiles(bbox)
    if tiles is not None:
        for dt in [dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1)]:
            url = "{0}/data/NTSG_Products/MOD16/MOD16A2.105_MERRAGMAO/Y{1}".format(urlbase, dt.year)
            resp_year = requests.get(url)
            try:
                assert resp_year.status_code == 200
                days = [link for link in BeautifulSoup(resp_year.text, parse_only=SoupStrainer('a')) if isinstance(link, Tag) and link.text.find(dt.strftime("%j")) >= 0]
                assert len(days) > 0
                resp_day = requests.get("{0}{1}".format(urlbase, days[0].get('href')))
                assert resp_day.status_code == 200
                files = [link.get('href') for link in BeautifulSoup(resp_day.text, parse_only=SoupStrainer('a')) if isinstance(link, Tag) and link.text.find("hdf") > 0]
                files = [f for f in files if any(f.find("h{0:02d}v{1:02d}".format(t[1], t[0])) > 0 for t in tiles)]
                outpath = tempfile.mkdtemp()
                for fname in files:
                    resp_file = requests.get("{0}{1}".format(urlbase, fname)) 
                    filename = fname.split("/")[-1]
                    with open("{0}/{1}".format(outpath, filename), 'wb') as fout:
                        for chunk in resp_file:
                            fout.write(chunk)
                    proc = subprocess.Popen(["gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:MOD_Grid_MOD16A2:ET_1km".format(
                        outpath, filename), "{0}/{1}".format(outpath, filename).replace("hdf", "tif")], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
                    out, err = proc.communicate()
                    log.debug(out)
                tifs = glob.glob("{0}/*.tif".format(outpath))
                proc = subprocess.Popen(
                    ["gdal_merge.py", "-o", "{0}/et.tif".format(outpath)] + tifs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
                out, err = proc.communicate()
                log.debug(out)
                proc = subprocess.Popen(["gdal_calc.py", "-A", "{0}/et.tif".format(outpath), "--outfile={0}/et1.tif".format(
                    outpath), "--NoDataValue=-9999", "--calc=(A<32701)*(0.1*A+9999)-9999"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
                out, err = proc.communicate()
                log.debug(out)
                proc = subprocess.Popen(["gdalwarp", "-t_srs", "+proj=latlong +ellps=sphere", "-tr", str(
                    res), str(-res), "{0}/et1.tif".format(outpath), "{0}/et2.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
                out, err = proc.communicate()
                log.debug(out)
                if bbox is None:
                    pstr = []
                else:
                    pstr = ["-projwin", str(bbox[0]), str(bbox[3]), str(bbox[2]), str(bbox[1])]
                proc = subprocess.Popen(["gdal_translate"] + pstr + ["-a_srs", "epsg:4326", "{0}/et2.tif".format(outpath), "{0}/et3.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
                out, err = proc.communicate()
                log.debug(out)
                dbio.ingest(
                    dbname, "{0}/et3.tif".format(outpath), dt, table, False)
                shutil.rmtree(outpath)
            except:
                log.warning("MOD16 data not available for {0}. Skipping download!".format(
                    dt.strftime("%Y-%m-%d")))