Python Examples of fake_useragent.UserAgent

Source File: custom_driver.py From king-bot with MIT License

8 votes

def headless(self, path: str, proxy: str = "") -> None:
        ua = UserAgent()
        userAgent = ua.random
        options = webdriver.ChromeOptions()
        options.add_argument("headless")
        options.add_argument("window-size=1500,1200")
        options.add_argument("no-sandbox")
        options.add_argument("disable-dev-shm-usage")
        options.add_argument("disable-gpu")
        options.add_argument("log-level=3")
        options.add_argument(f"user-agent={userAgent}")

        if proxy != "":
            self.proxy = True
            options.add_argument("proxy-server={}".format(proxy))

        self.driver = webdriver.Chrome(path, chrome_options=options)
        self.set_config()
        self._headless = True

Source File: utils.py From web_develop with GNU General Public License v3.0

7 votes

def get_user_agent():
    ua = UserAgent()
    return ua.random

Source File: google_scraper.py From youtube-video-face-swap with MIT License

6 votes

def download_image(link, image_data, query):
    download_image.delta += 1
    # Use a random user agent header for bot id
    ua = UserAgent()
    headers = {"User-Agent": ua.random}

    # Get the image link
    try:
        # Get the file name and type
        file_name = link.split("/")[-1]
        type = file_name.split(".")[-1]
        type = (type[:3]) if len(type) > 3 else type
        if type.lower() == "jpe":
            type = "jpeg"
        if type.lower() not in ["jpeg", "jfif", "exif", "tiff", "gif", "bmp", "png", "webp", "jpg"]:
            type = "jpg"

        # Download the image
        print("[%] Downloading Image #{} from {}".format(download_image.delta, link))
        try:
            urllib.request.urlretrieve(link,
                                       "data/raw/{}/".format(query) + "{}.{}".format(str(download_image.delta),
                                                                                             type))
            print("[%] Downloaded File\n")
        except Exception as e:
            download_image.delta -= 1
            print("[!] Issue Downloading: {}\n[!] Error: {}".format(link, e))
    except Exception as e:
        download_image.delta -= 1
        print("[!] Issue getting: {}\n[!] Error:: {}".format(link, e))

Source File: utils.py From ProxyPool with Apache License 2.0

6 votes

def get_page(url, options={}):
    try:
        ua = UserAgent()
    except FakeUserAgentError:
        pass
    base_headers = {
        'User-Agent':  ua.random,
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8'
    }
    headers = dict(base_headers, **options)
    print('Getting', url)
    try:
        r = requests.get(url, headers=headers)
        print('Getting result', url, r.status_code)
        if r.status_code == 200:
            return r.text
    except ConnectionError:
        print('Crawling Failed', url)
        return None

Source File: __main__.py From mosmetro-python with GNU General Public License v3.0

6 votes

def main(args=None):
    print(datetime.now())

    with requests.Session() as session:
        if UserAgent:
            session.headers['User-Agent'] = UserAgent(
                path=ROOT + "/res/user-agent.json"
            ).random
        else:
            print("Random User-Agent disabled. Please install 'fake-useragent'.")

        p = Provider.find(session)

        if p is True or p is False:
            sys.exit(p)

        if p.connect():
            print("Connected successfully! :3")
            sys.exit(0)
        else:
            print("Connection failed :(")
            sys.exit(1)

Source File: __init__.py From aiodl with MIT License

6 votes

def download(url, output=None, num_tasks=16, max_tries=10,
                   fake_user_agent=False, quiet=False, *, loop=None):
    if loop is None:
        loop = asyncio.get_event_loop()
    if fake_user_agent:
        user_agent = UserAgent().random
    else:
        user_agent = None
    d = Download(
        url=url,
        output_fname=output,
        num_tasks=num_tasks,
        max_tries=max_tries,
        user_agent=user_agent,
        quiet=quiet,
        loop=loop
    )
    try:
        return await d.download()
    finally:
        await d.close()

Source File: aqi.py From yui with GNU Affero General Public License v3.0

6 votes

def get_aqi_result(idx: str) -> Optional[AQIRecord]:
    url = f'https://api.waqi.info/api/feed/@{idx}/obs.en.json'
    headers = {
        'User-Agent': UserAgent().chrome,
        'accept-language': 'ko',
    }
    async with aiohttp.ClientSession() as session:
        async with session.get(url, headers=headers) as res:
            d2 = await res.json(loads=json.loads)

    if d2['rxs']['obs'][0]['status'] != 'ok':
        return None

    data = d2['rxs']['obs'][0]['msg']

    return AQIRecord(
        name=data['i18n']['name']['ko'],
        aqi=data['aqi'],
        time=data['time']['utc']['v'],
        **{
            x['p']: Field(*x['v'])
            for x in data['iaqi']
            if x['p'] in ['pm25', 'pm10', 'o3', 'no2', 'so2', 'co']
        },
    )

Source File: Village-Spider-Test.py From NBSPRC-spider with Apache License 2.0

6 votes

def getUrl(url,num_retries = 5):
    ua = UserAgent()
    headers = {'User-Agent':ua.random}
    try:
        response = requests.get(url,headers = headers)
        response.encoding = response.apparent_encoding
        data = response.text
        return data
    except Exception as e:
        if num_retries > 0:
            time.sleep(10)
            print(url)
            print("requests fail, retry!")
            return getUrl(url,num_retries-1) #递归调用
        else:
            print("retry fail!")
            print("error: %s" % e + " " + url)
            return #返回空值，程序运行报错

Source File: WJX_Autosubmit.py From WJX_Autosubmit with GNU General Public License v3.0

5 votes

def Get_POOLS():
	headers = {'User-Agent': UserAgent().random}
	html = requests.get(url='https://www.xicidaili.com/nn/', headers=headers)
	#以下为细节说明，来源于re官方文档
	#正则匹配（在带有 'r' 前缀的字符串字面值中，反斜杠不必做任何特殊处理。）
	#(...)（组合），匹配括号内的任意正则表达式，并标识出组合的开始和结尾。
	pools = re.findall(r'<td>(.+?)</td>', html.text)[0:499:5]
	return pools

#从代理池中提取地址用于构造headers提交数据伪造提交ip

Source File: http_client.py From proxy_py with GNU General Public License v3.0

5 votes

def __init__(self):
        self.user_agent = UserAgent().random
        self.timeout = 60
        if HttpClient._aiohttp_connector is None:
            HttpClient._aiohttp_connector = ProxyConnector(
                remote_resolve=True,
                limit=settings.NUMBER_OF_SIMULTANEOUS_REQUESTS,
                limit_per_host=settings.NUMBER_OF_SIMULTANEOUS_REQUESTS_PER_HOST,
            )
        self.proxy_address = None

Source File: WJX_Autosubmit.py From WJX_Autosubmit with GNU General Public License v3.0

5 votes

def Get_Headers(pools):
	Random_IP = random.choice(pools)
	headers = {	 
		'User-Agent':UserAgent().random,
		'X-Forwarded-For':Random_IP,
		}
	return headers

Source File: WJX_Autosubmit.py From WJX_Autosubmit with GNU General Public License v3.0

5 votes

def get_fill_content(url):
	headers = {
	'user-agent': UserAgent().random
	}
	res = requests.get(url, headers=headers)#, headers=headers)
	cookies = res.cookies
	return res.text,cookies

# 从页面中获取curid,rn,jqnonce,starttime,同时构造ktimes用作提交调查问卷

Source File: 19.py From WJX_Autosubmit with GNU General Public License v3.0

5 votes

def Get_Headers():
    headers = {  
        'Host':'www.wjx.cn',
        'User-Agent': UserAgent().random,#随机User-Agent，需要从fake_useragent 库中 UserAgent包
        'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',#以表格形式提交数据
        'Referer':'https://www.wjx.cn/m/XXXXX.aspx',#你的调查问卷链接
        'Cookie':'XXXXX',#抓包
        'X-Forwarded-For':Get_IP()#调用函数获取代理IP地址
    }
    return headers

Source File: 19.py From WJX_Autosubmit with GNU General Public License v3.0

5 votes

def Get_IP():
    headers = {     #构建简易的请求头，用于访问西刺代理网站
        'User-Agent': UserAgent().random
    }
    html = urllib.request.Request(url='https://www.xicidaili.com/nn/', headers=headers)
    html = urllib.request.urlopen(html).read().decode('utf-8')
    reg = r'<td>(.+?)</td>'#通过浏览器的F12查看页面元素，发现所有元素都放在td标签中，并按IP地址，端口，协议，地址，时间的顺序排列
    reg = re.compile(reg)
    #经正侧表达式匹配后将所有元素按顺序存放在列表中，但这并不是最终的结果
    pools = re.findall(reg, html)[0:499:5]#提取出其中所有的IP地址，并存放到列表中，形成地址池
    Random_IP = random.choice(pools)#随机在地址池中选出一个IP地址
    return Random_IP

Source File: ClassCongregation.py From Medusa with GNU General Public License v3.0

5 votes

def result(self, Values: str) -> str:  # 使用随机头传入传入参数
        try:
            self.Values = Values
            if len(Values) > 11:
                return Values
            ua = UserAgent(verify_ssl=False)
            if self.Values == None:  # 如果参数为空使用随机头
                return (ua.random)
            elif self.Values.lower() == "firefox":  # 如果是火狐字符串使用火狐头
                return (ua.firefox)
            elif self.Values.lower() == "ie":  # IE浏览器
                return (ua.ie)
            elif self.Values.lower() == "msie":  # msie
                return (ua.msie)
            elif self.Values.lower() == "opera":  # Opera Software
                return (ua.opera)
            elif self.Values.lower() == "chrome":  # 谷歌浏览器
                return (ua.chrome)
            elif self.Values.lower() == "AppleWebKit":  # AppleWebKit
                return (ua.google)
            elif self.Values.lower() == "Gecko":  # Gecko
                return (ua.ff)
            elif self.Values.lower() == "safari":  # apple safari
                return (ua.safari)
            else:
                return (ua.random)  # 如果用户瞎几把乱输使用随机头
        except Exception as e:
            ErrorLog().Write("ClassCongregation_AgentHeader(class)_result(def)", e)
            return "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36"  # 报错使用随机头

Source File: webservicescanner.py From WebServiceScanner with Apache License 2.0

5 votes

def get_http_banner(self, url):
        try:
            r = requests.get(url, headers={'UserAgent': UserAgent().random},
                             timeout=2, verify=False, allow_redirects=True)
            soup = BeautifulSoup(r.content, 'lxml')
            return soup.title.text.strip('\n').strip()
        except Exception as e:
            pass

Source File: middlewares.py From Douban_Crawler with GNU General Public License v3.0

5 votes

def process_request(self,request,spider):
        user_agent = UserAgent()
        ua = user_agent.random
        if ua:
            log.msg('Current UserAgent: '+ua, level=log.INFO) 
            request.headers.setdefault('User-Agent', ua)

Source File: _bing.py From image_search with MIT License

5 votes

def save_image(link, file_path):
    # Use a random user agent header for bot id
    ua = UserAgent(verify_ssl=False)
    headers = {"User-Agent": ua.random}
    r = requests.get(link, stream=True, headers=headers)
    if r.status_code == 200:
        with open(file_path, 'wb') as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)
    else:
        raise Exception("Image returned a {} error.".format(r.status_code))

Source File: _google.py From image_search with MIT License

5 votes

def save_image(link, file_path):
    # Use a random user agent header for bot id
    ua = UserAgent(verify_ssl=False)
    headers = {"User-Agent": ua.random}
    r = requests.get(link, stream=True, headers=headers)
    if r.status_code == 200:
        with open(file_path, 'wb') as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)
    else:
        raise Exception("Image returned a {} error.".format(r.status_code))

Source File: middlewares.py From Douban_Crawler with GNU General Public License v3.0

5 votes

def process_request(self,request, spider):
        user_agent = UserAgent()
        ua = user_agent.random
        if ua:
            #print ua
            print "********Current UserAgent:%s************" %ua  
            #log.msg('Current UserAgent: '+ua, level='INFO') 
            request.headers.setdefault('User-Agent', ua)

Source File: isp_data_pollution.py From isp-data-pollution with MIT License

5 votes

def get_useragents(self):
        for attempt in range(5):
            try:
                self.fake_ua = fake_ua.UserAgent()
            except (fake_ua.errors.FakeUserAgentError,urllib.error.URLError) as e:
                if self.debug: print(f'.UserAgent exception #{attempt}:\n{e}')
            else:
                break
        else:
            print('Too many .UserAgent failures. Exiting.')
            sys.exit(1)

Source File: weiboEmoji.py From DecryptLogin with MIT License

5 votes

def __init__(self, username, password, **kwargs):
		self.headers = {
							'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
						}
		self.cur_path = os.getcwd()
		self.ua = UserAgent()
		self.session = weiboEmoji.login(username, password)

Source File: helpers.py From NBAsh with MIT License

5 votes

def get_header():
    location = os.getcwd() + '/agent.json'
    ua = fake_useragent.UserAgent(path=location)
    return ua.random

Source File: request.py From dart-fss with MIT License

5 votes

def get_user_agent():
    """ Return user-agent
    Returns
    -------
    str
        user-agent
    """
    ua = UserAgent()
    agent = ua.chrome
    return str(agent)

Source File: request.py From dart-fss with MIT License

5 votes

def update_user_agent(self, force: bool = False):
        """ Update User-Agent

        Parameters
        ----------
        force: bool
            Force update
        """
        if force:
            ua = UserAgent()
            agent = ua.chrome
            user_agent = str(agent)
        else:
            user_agent = get_user_agent()
        self.s.headers.update({'user-agent': user_agent})

Source File: middlewares.py From JobSpiders with Apache License 2.0

5 votes

def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
        self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')  # 从setting文件中读取RANDOM_UA_TYPE值

Source File: requests_html.py From requests-html with MIT License

5 votes

def user_agent(style=None) -> _UserAgent:
    """Returns an apparently legit user-agent, if not requested one of a specific
    style. Defaults to a Chrome-style User-Agent.
    """
    global useragent
    if (not useragent) and style:
        useragent = UserAgent()

    return useragent[style] if style else DEFAULT_USER_AGENT

Source File: module_resolvers.py From simplydomain with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self):
        """
        Init class structure.
        """
        module_helpers.RequestsHelpers.__init__(self)
        self.ua = UserAgent()
        self.nameservers = []
        self.nameserver_ips = []

Source File: module_helpers.py From simplydomain with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self):
        """
        Init class structure.
        """
        # TODO: Init Logging class
        core_output.CoreOutput.__init__(self)
        self.ua = UserAgent()

Source File: request_handler.py From Raccoon with MIT License

5 votes

def _set_headers():
        headers = requests_utils.default_headers()
        headers["User-Agent"] = UserAgent(verify_ssl=False).random
        return headers

Python fake_useragent.UserAgent() Examples