Python fake_useragent.UserAgent() Examples

The following are 30 code examples of fake_useragent.UserAgent(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module fake_useragent , or try the search function .
Example #1
Source File: custom_driver.py    From king-bot with MIT License 8 votes vote down vote up
def headless(self, path: str, proxy: str = "") -> None:
        ua = UserAgent()
        userAgent = ua.random
        options = webdriver.ChromeOptions()
        options.add_argument("headless")
        options.add_argument("window-size=1500,1200")
        options.add_argument("no-sandbox")
        options.add_argument("disable-dev-shm-usage")
        options.add_argument("disable-gpu")
        options.add_argument("log-level=3")
        options.add_argument(f"user-agent={userAgent}")

        if proxy != "":
            self.proxy = True
            options.add_argument("proxy-server={}".format(proxy))

        self.driver = webdriver.Chrome(path, chrome_options=options)
        self.set_config()
        self._headless = True 
Example #2
Source File: utils.py    From web_develop with GNU General Public License v3.0 7 votes vote down vote up
def get_user_agent():
    ua = UserAgent()
    return ua.random 
Example #3
Source File: google_scraper.py    From youtube-video-face-swap with MIT License 6 votes vote down vote up
def download_image(link, image_data, query):
    download_image.delta += 1
    # Use a random user agent header for bot id
    ua = UserAgent()
    headers = {"User-Agent": ua.random}

    # Get the image link
    try:
        # Get the file name and type
        file_name = link.split("/")[-1]
        type = file_name.split(".")[-1]
        type = (type[:3]) if len(type) > 3 else type
        if type.lower() == "jpe":
            type = "jpeg"
        if type.lower() not in ["jpeg", "jfif", "exif", "tiff", "gif", "bmp", "png", "webp", "jpg"]:
            type = "jpg"

        # Download the image
        print("[%] Downloading Image #{} from {}".format(download_image.delta, link))
        try:
            urllib.request.urlretrieve(link,
                                       "data/raw/{}/".format(query) + "{}.{}".format(str(download_image.delta),
                                                                                             type))
            print("[%] Downloaded File\n")
        except Exception as e:
            download_image.delta -= 1
            print("[!] Issue Downloading: {}\n[!] Error: {}".format(link, e))
    except Exception as e:
        download_image.delta -= 1
        print("[!] Issue getting: {}\n[!] Error:: {}".format(link, e)) 
Example #4
Source File: utils.py    From ProxyPool with Apache License 2.0 6 votes vote down vote up
def get_page(url, options={}):
    try:
        ua = UserAgent()
    except FakeUserAgentError:
        pass
    base_headers = {
        'User-Agent':  ua.random,
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8'
    }
    headers = dict(base_headers, **options)
    print('Getting', url)
    try:
        r = requests.get(url, headers=headers)
        print('Getting result', url, r.status_code)
        if r.status_code == 200:
            return r.text
    except ConnectionError:
        print('Crawling Failed', url)
        return None 
Example #5
Source File: __main__.py    From mosmetro-python with GNU General Public License v3.0 6 votes vote down vote up
def main(args=None):
    print(datetime.now())

    with requests.Session() as session:
        if UserAgent:
            session.headers['User-Agent'] = UserAgent(
                path=ROOT + "/res/user-agent.json"
            ).random
        else:
            print("Random User-Agent disabled. Please install 'fake-useragent'.")

        p = Provider.find(session)

        if p is True or p is False:
            sys.exit(p)

        if p.connect():
            print("Connected successfully! :3")
            sys.exit(0)
        else:
            print("Connection failed :(")
            sys.exit(1) 
Example #6
Source File: __init__.py    From aiodl with MIT License 6 votes vote down vote up
def download(url, output=None, num_tasks=16, max_tries=10,
                   fake_user_agent=False, quiet=False, *, loop=None):
    if loop is None:
        loop = asyncio.get_event_loop()
    if fake_user_agent:
        user_agent = UserAgent().random
    else:
        user_agent = None
    d = Download(
        url=url,
        output_fname=output,
        num_tasks=num_tasks,
        max_tries=max_tries,
        user_agent=user_agent,
        quiet=quiet,
        loop=loop
    )
    try:
        return await d.download()
    finally:
        await d.close() 
Example #7
Source File: aqi.py    From yui with GNU Affero General Public License v3.0 6 votes vote down vote up
def get_aqi_result(idx: str) -> Optional[AQIRecord]:
    url = f'https://api.waqi.info/api/feed/@{idx}/obs.en.json'
    headers = {
        'User-Agent': UserAgent().chrome,
        'accept-language': 'ko',
    }
    async with aiohttp.ClientSession() as session:
        async with session.get(url, headers=headers) as res:
            d2 = await res.json(loads=json.loads)

    if d2['rxs']['obs'][0]['status'] != 'ok':
        return None

    data = d2['rxs']['obs'][0]['msg']

    return AQIRecord(
        name=data['i18n']['name']['ko'],
        aqi=data['aqi'],
        time=data['time']['utc']['v'],
        **{
            x['p']: Field(*x['v'])
            for x in data['iaqi']
            if x['p'] in ['pm25', 'pm10', 'o3', 'no2', 'so2', 'co']
        },
    ) 
Example #8
Source File: Village-Spider-Test.py    From NBSPRC-spider with Apache License 2.0 6 votes vote down vote up
def getUrl(url,num_retries = 5):
    ua = UserAgent()
    headers = {'User-Agent':ua.random}
    try:
        response = requests.get(url,headers = headers)
        response.encoding = response.apparent_encoding
        data = response.text
        return data
    except Exception as e:
        if num_retries > 0:
            time.sleep(10)
            print(url)
            print("requests fail, retry!")
            return getUrl(url,num_retries-1) #递归调用
        else:
            print("retry fail!")
            print("error: %s" % e + " " + url)
            return #返回空值,程序运行报错 
Example #9
Source File: WJX_Autosubmit.py    From WJX_Autosubmit with GNU General Public License v3.0 5 votes vote down vote up
def Get_POOLS():
	headers = {'User-Agent': UserAgent().random}
	html = requests.get(url='https://www.xicidaili.com/nn/', headers=headers)
	#以下为细节说明,来源于re官方文档
	#正则匹配(在带有 'r' 前缀的字符串字面值中,反斜杠不必做任何特殊处理。)
	#(...)(组合),匹配括号内的任意正则表达式,并标识出组合的开始和结尾。
	pools = re.findall(r'<td>(.+?)</td>', html.text)[0:499:5]
	return pools

#从代理池中提取地址用于构造headers提交数据伪造提交ip 
Example #10
Source File: http_client.py    From proxy_py with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self):
        self.user_agent = UserAgent().random
        self.timeout = 60
        if HttpClient._aiohttp_connector is None:
            HttpClient._aiohttp_connector = ProxyConnector(
                remote_resolve=True,
                limit=settings.NUMBER_OF_SIMULTANEOUS_REQUESTS,
                limit_per_host=settings.NUMBER_OF_SIMULTANEOUS_REQUESTS_PER_HOST,
            )
        self.proxy_address = None 
Example #11
Source File: WJX_Autosubmit.py    From WJX_Autosubmit with GNU General Public License v3.0 5 votes vote down vote up
def Get_Headers(pools):
	Random_IP = random.choice(pools)
	headers = {	 
		'User-Agent':UserAgent().random,
		'X-Forwarded-For':Random_IP,
		}
	return headers 
Example #12
Source File: WJX_Autosubmit.py    From WJX_Autosubmit with GNU General Public License v3.0 5 votes vote down vote up
def get_fill_content(url):
	headers = {
	'user-agent': UserAgent().random
	}
	res = requests.get(url, headers=headers)#, headers=headers)
	cookies = res.cookies
	return res.text,cookies

# 从页面中获取curid,rn,jqnonce,starttime,同时构造ktimes用作提交调查问卷 
Example #13
Source File: 19.py    From WJX_Autosubmit with GNU General Public License v3.0 5 votes vote down vote up
def Get_Headers():
    headers = {  
        'Host':'www.wjx.cn',
        'User-Agent': UserAgent().random,#随机User-Agent,需要从fake_useragent 库中 UserAgent包
        'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',#以表格形式提交数据
        'Referer':'https://www.wjx.cn/m/XXXXX.aspx',#你的调查问卷链接
        'Cookie':'XXXXX',#抓包
        'X-Forwarded-For':Get_IP()#调用函数获取代理IP地址
    }
    return headers 
Example #14
Source File: 19.py    From WJX_Autosubmit with GNU General Public License v3.0 5 votes vote down vote up
def Get_IP():
    headers = {     #构建简易的请求头,用于访问西刺代理网站
        'User-Agent': UserAgent().random
    }
    html = urllib.request.Request(url='https://www.xicidaili.com/nn/', headers=headers)
    html = urllib.request.urlopen(html).read().decode('utf-8')
    reg = r'<td>(.+?)</td>'#通过浏览器的F12查看页面元素,发现所有元素都放在td标签中,并按IP地址,端口,协议,地址,时间的顺序排列
    reg = re.compile(reg)
    #经正侧表达式匹配后将所有元素按顺序存放在列表中,但这并不是最终的结果
    pools = re.findall(reg, html)[0:499:5]#提取出其中所有的IP地址,并存放到列表中,形成地址池
    Random_IP = random.choice(pools)#随机在地址池中选出一个IP地址
    return Random_IP 
Example #15
Source File: ClassCongregation.py    From Medusa with GNU General Public License v3.0 5 votes vote down vote up
def result(self, Values: str) -> str:  # 使用随机头传入传入参数
        try:
            self.Values = Values
            if len(Values) > 11:
                return Values
            ua = UserAgent(verify_ssl=False)
            if self.Values == None:  # 如果参数为空使用随机头
                return (ua.random)
            elif self.Values.lower() == "firefox":  # 如果是火狐字符串使用火狐头
                return (ua.firefox)
            elif self.Values.lower() == "ie":  # IE浏览器
                return (ua.ie)
            elif self.Values.lower() == "msie":  # msie
                return (ua.msie)
            elif self.Values.lower() == "opera":  # Opera Software
                return (ua.opera)
            elif self.Values.lower() == "chrome":  # 谷歌浏览器
                return (ua.chrome)
            elif self.Values.lower() == "AppleWebKit":  # AppleWebKit
                return (ua.google)
            elif self.Values.lower() == "Gecko":  # Gecko
                return (ua.ff)
            elif self.Values.lower() == "safari":  # apple safari
                return (ua.safari)
            else:
                return (ua.random)  # 如果用户瞎几把乱输使用随机头
        except Exception as e:
            ErrorLog().Write("ClassCongregation_AgentHeader(class)_result(def)", e)
            return "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36"  # 报错使用随机头 
Example #16
Source File: webservicescanner.py    From WebServiceScanner with Apache License 2.0 5 votes vote down vote up
def get_http_banner(self, url):
        try:
            r = requests.get(url, headers={'UserAgent': UserAgent().random},
                             timeout=2, verify=False, allow_redirects=True)
            soup = BeautifulSoup(r.content, 'lxml')
            return soup.title.text.strip('\n').strip()
        except Exception as e:
            pass 
Example #17
Source File: middlewares.py    From Douban_Crawler with GNU General Public License v3.0 5 votes vote down vote up
def process_request(self,request,spider):
        user_agent = UserAgent()
        ua = user_agent.random
        if ua:
            log.msg('Current UserAgent: '+ua, level=log.INFO) 
            request.headers.setdefault('User-Agent', ua) 
Example #18
Source File: _bing.py    From image_search with MIT License 5 votes vote down vote up
def save_image(link, file_path):
    # Use a random user agent header for bot id
    ua = UserAgent(verify_ssl=False)
    headers = {"User-Agent": ua.random}
    r = requests.get(link, stream=True, headers=headers)
    if r.status_code == 200:
        with open(file_path, 'wb') as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)
    else:
        raise Exception("Image returned a {} error.".format(r.status_code)) 
Example #19
Source File: _google.py    From image_search with MIT License 5 votes vote down vote up
def save_image(link, file_path):
    # Use a random user agent header for bot id
    ua = UserAgent(verify_ssl=False)
    headers = {"User-Agent": ua.random}
    r = requests.get(link, stream=True, headers=headers)
    if r.status_code == 200:
        with open(file_path, 'wb') as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)
    else:
        raise Exception("Image returned a {} error.".format(r.status_code)) 
Example #20
Source File: middlewares.py    From Douban_Crawler with GNU General Public License v3.0 5 votes vote down vote up
def process_request(self,request, spider):
        user_agent = UserAgent()
        ua = user_agent.random
        if ua:
            #print ua
            print "********Current UserAgent:%s************" %ua  
            #log.msg('Current UserAgent: '+ua, level='INFO') 
            request.headers.setdefault('User-Agent', ua) 
Example #21
Source File: isp_data_pollution.py    From isp-data-pollution with MIT License 5 votes vote down vote up
def get_useragents(self):
        for attempt in range(5):
            try:
                self.fake_ua = fake_ua.UserAgent()
            except (fake_ua.errors.FakeUserAgentError,urllib.error.URLError) as e:
                if self.debug: print(f'.UserAgent exception #{attempt}:\n{e}')
            else:
                break
        else:
            print('Too many .UserAgent failures. Exiting.')
            sys.exit(1) 
Example #22
Source File: weiboEmoji.py    From DecryptLogin with MIT License 5 votes vote down vote up
def __init__(self, username, password, **kwargs):
		self.headers = {
							'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
						}
		self.cur_path = os.getcwd()
		self.ua = UserAgent()
		self.session = weiboEmoji.login(username, password) 
Example #23
Source File: helpers.py    From NBAsh with MIT License 5 votes vote down vote up
def get_header():
    location = os.getcwd() + '/agent.json'
    ua = fake_useragent.UserAgent(path=location)
    return ua.random 
Example #24
Source File: request.py    From dart-fss with MIT License 5 votes vote down vote up
def get_user_agent():
    """ Return user-agent
    Returns
    -------
    str
        user-agent
    """
    ua = UserAgent()
    agent = ua.chrome
    return str(agent) 
Example #25
Source File: request.py    From dart-fss with MIT License 5 votes vote down vote up
def update_user_agent(self, force: bool = False):
        """ Update User-Agent

        Parameters
        ----------
        force: bool
            Force update
        """
        if force:
            ua = UserAgent()
            agent = ua.chrome
            user_agent = str(agent)
        else:
            user_agent = get_user_agent()
        self.s.headers.update({'user-agent': user_agent}) 
Example #26
Source File: middlewares.py    From JobSpiders with Apache License 2.0 5 votes vote down vote up
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
        self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')  # 从setting文件中读取RANDOM_UA_TYPE值 
Example #27
Source File: requests_html.py    From requests-html with MIT License 5 votes vote down vote up
def user_agent(style=None) -> _UserAgent:
    """Returns an apparently legit user-agent, if not requested one of a specific
    style. Defaults to a Chrome-style User-Agent.
    """
    global useragent
    if (not useragent) and style:
        useragent = UserAgent()

    return useragent[style] if style else DEFAULT_USER_AGENT 
Example #28
Source File: module_resolvers.py    From simplydomain with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self):
        """
        Init class structure.
        """
        module_helpers.RequestsHelpers.__init__(self)
        self.ua = UserAgent()
        self.nameservers = []
        self.nameserver_ips = [] 
Example #29
Source File: module_helpers.py    From simplydomain with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self):
        """
        Init class structure.
        """
        # TODO: Init Logging class
        core_output.CoreOutput.__init__(self)
        self.ua = UserAgent() 
Example #30
Source File: request_handler.py    From Raccoon with MIT License 5 votes vote down vote up
def _set_headers():
        headers = requests_utils.default_headers()
        headers["User-Agent"] = UserAgent(verify_ssl=False).random
        return headers