Python Examples of urllib.request.HTTPCookieProcessor

Source File: base.py From WordQuery with GNU General Public License v3.0

10 votes

def __init__(self):
        super(WebService, self).__init__()
        self.cache = defaultdict(defaultdict)
        self._cookie = CookieJar()
        self._opener = urllib2.build_opener(
            urllib2.HTTPCookieProcessor(self._cookie))
        self.query_interval = 1

Source File: DLInfos.py From iqiyi-parser with MIT License

7 votes

def __request__(self):

        Cookiejar = CookieJar()
        opener = build_opener(HTTPCookieProcessor(Cookiejar))
        _header = dict(self.headers.items())
        if self.cookie:
            _header.update({'Cookie': self.cookie})
        req = Request(self.url, headers=_header, origin_req_host=self.host)

        res = None

        try:
            res = opener.open(req)
            # break
        except HTTPError as e:
            # if e.code >= 400 and e.code < 500:
            return e, None

        except (socket.timeout, URLError) as e:
            return e, None
        except Exception as e:
            traceback.print_exc()
            return e, None

        return res, Cookiejar._cookies

Source File: web_utils.py From DLink_Harvester with GNU General Public License v3.0

6 votes

def cookie_friendly_download(referer_url, file_url, store_dir='.', timeout=1000):
    from http.cookiejar import CookieJar
    from urllib import request
    cj = CookieJar()
    cp = request.HTTPCookieProcessor(cj)
    opener = request.build_opener(cp)
    with opener.open(referer_url) as fin:
        fin.headers.items()
    import os
    from os import path
    with opener.open(file_url, timeout=timeout) as fin:
        file_bin = fin.read()
        filename = fin.headers['Content-Disposition']
        filename = filename.split(';')[-1].split('=')[1]
        os.makedirs(store_dir, exist_ok=True)
        with open(path.join(store_dir, filename), mode='wb') as fout:
            fout.write(file_bin)
            return path.join(store_dir, filename)

Source File: tweet_operation.py From twitter-intelligence with MIT License

6 votes

def get_json_response(tweet_criteria, refresh_cursor, cookiejar, proxy):
        url = "https://twitter.com/i/search/timeline?f=tweets&q=%s&src=typd&max_position=%s"

        url_data = ''

        if hasattr(tweet_criteria, 'username'):
            url_data += ' from:' + tweet_criteria.username

        if hasattr(tweet_criteria, 'query'):
            url_data += ' ' + tweet_criteria.query

        if hasattr(tweet_criteria, 'since'):
            url_data += ' since:' + tweet_criteria.since

        if hasattr(tweet_criteria, 'until'):
            url_data += ' until:' + tweet_criteria.until

        if hasattr(tweet_criteria, 'topTweets'):
            if tweet_criteria.topTweets:
                url = "https://twitter.com/i/search/timeline?q=%s&src=typd&max_position=%s"

        url = url % (urllib.parse.quote(url_data), urllib.parse.quote(refresh_cursor))

        headers = [
            ('Host', "twitter.com"),
            ('User-Agent',
             "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"),
            ('Accept', "application/json, text/javascript, */*; q=0.01"),
            ('Accept-Language', "de,en-US;q=0.7,en;q=0.3"),
            ('X-Requested-With', "XMLHttpRequest"),
            ('Referer', url),
            ('Connection', "keep-alive")
        ]

        if proxy:
            opener = urllib2.build_opener(urllib2.ProxyHandler({'http': proxy, 'https': proxy}),
                                          urllib2.HTTPCookieProcessor(cookiejar))
        else:
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
            opener.addheaders = headers

        try:
            response = opener.open(url)
            json_response = response.read()
        except:
            print
            "Twitter weird response. Try to see on browser: https://twitter.com/search?q=%s&src=typd" % urllib.parse.quote(
                url_data)
            sys.exit()
            return

        data = json.loads(json_response)

        return data

Source File: scholar.py From dblp with MIT License

6 votes

def __init__(self):
        self.articles = []
        self.query = None
        self.cjar = MozillaCookieJar()

        # If we have a cookie file, load it:
        if ScholarConf.COOKIE_JAR_FILE and \
           os.path.exists(ScholarConf.COOKIE_JAR_FILE):
            try:
                self.cjar.load(ScholarConf.COOKIE_JAR_FILE,
                               ignore_discard=True)
                ScholarUtils.log('info', 'loaded cookies file')
            except Exception as msg:
                ScholarUtils.log('warn', 'could not load cookies file: %s' % msg)
                self.cjar = MozillaCookieJar() # Just to be safe

        self.opener = build_opener(HTTPCookieProcessor(self.cjar))
        self.settings = None # Last settings object, if any

Source File: tokenresolver.py From plugin.video.vrt.nu with GNU General Public License v3.0

5 votes

def _get_fresh_token(self, refresh_token, name):
        """Refresh an expired X-VRT-Token, vrtlogin-at or vrtlogin-rt token"""
        refresh_url = self._TOKEN_GATEWAY_URL + '/refreshtoken?legacy=true'
        cookie_value = 'vrtlogin-rt=' + refresh_token
        headers = {'Cookie': cookie_value}
        cookiejar = cookielib.CookieJar()
        opener = build_opener(HTTPCookieProcessor(cookiejar), ProxyHandler(self._proxies))
        req = Request(refresh_url, headers=headers)
        log(2, 'URL get: {url}', url=refresh_url)
        opener.open(req)
        return TokenResolver._create_token_dictionary(cookiejar, name)

Source File: __init__.py From odoorpc with GNU Lesser General Public License v3.0

5 votes

def __init__(
        self,
        host,
        port=8069,
        timeout=120,
        version=None,
        deserialize=True,
        opener=None,
    ):
        super(ConnectorJSONRPC, self).__init__(host, port, timeout, version)
        self.deserialize = deserialize
        # One URL opener (with cookies handling) shared between
        # JSON and HTTP requests
        if opener is None:
            cookie_jar = CookieJar()
            opener = build_opener(HTTPCookieProcessor(cookie_jar))
        self._opener = opener
        self._proxy_json, self._proxy_http = self._get_proxies()

Source File: tokenresolver.py From plugin.video.vrt.nu with GNU General Public License v3.0

5 votes

def _get_usertoken(self, name=None, login_json=None):
        """Get a user X-VRT-Token, vrtlogin-at, vrtlogin-expiry, vrtlogin-rt, SESSION, OIDCXSRF or state token"""
        if not login_json:
            login_json = self._get_login_json()
        cookiejar = cookielib.CookieJar()
        opener = build_opener(HTTPCookieProcessor(cookiejar), ProxyHandler(self._proxies))
        log(2, 'URL get: {url}', url=unquote(self._USER_TOKEN_GATEWAY_URL))
        opener.open(self._USER_TOKEN_GATEWAY_URL)
        xsrf = next((cookie for cookie in cookiejar if cookie.name == 'OIDCXSRF'), None)
        if xsrf is None:
            return None
        payload = dict(
            UID=login_json.get('UID'),
            UIDSignature=login_json.get('UIDSignature'),
            signatureTimestamp=login_json.get('signatureTimestamp'),
            client_id='vrtnu-site',
            _csrf=xsrf.value
        )
        data = urlencode(payload).encode()
        log(2, 'URL post: {url}', url=unquote(self._VRT_LOGIN_URL))
        opener.open(self._VRT_LOGIN_URL, data=data)

        # Cache additional tokens for later use
        refreshtoken = TokenResolver._create_token_dictionary(cookiejar, cookie_name='vrtlogin-rt')
        accesstoken = TokenResolver._create_token_dictionary(cookiejar, cookie_name='vrtlogin-at')
        if refreshtoken is not None:
            from json import dumps
            cache_file = self._get_token_filename('vrtlogin-rt')
            update_cache(cache_file, dumps(refreshtoken), self._TOKEN_CACHE_DIR)
        if accesstoken is not None:
            from json import dumps
            cache_file = self._get_token_filename('vrtlogin-at')
            update_cache(cache_file, dumps(accesstoken), self._TOKEN_CACHE_DIR)

        return TokenResolver._create_token_dictionary(cookiejar, name)

Source File: common.py From iqiyi-parser with MIT License

5 votes

def initOpener(self):
        self.cookiejar = CookieJar()
        self.opener = build_opener(HTTPCookieProcessor(self.cookiejar))
        self.opener.addheaders = list(self.headers.items())

Source File: base.py From FastWordQuery with GNU General Public License v3.0

5 votes

def __init__(self):
        super(WebService, self).__init__()
        self._cookie = CookieJar()
        self._opener = urllib2.build_opener(
            urllib2.HTTPCookieProcessor(self._cookie))
        self.query_interval = 1.0

Source File: html_downloader.py From SmallReptileTraining with MIT License

5 votes

def download(self, url, retry_count=3, headers=None, proxy=None, data=None):
        if url is None:
            return None
        try:
            req = request.Request(url, headers=headers, data=data)
            cookie = cookiejar.CookieJar()
            cookie_process = request.HTTPCookieProcessor(cookie)
            opener = request.build_opener()
            if proxy:
                proxies = {urlparse(url).scheme: proxy}
                opener.add_handler(request.ProxyHandler(proxies))
            content = opener.open(req).read()
        except error.URLError as e:
            print('HtmlDownLoader download error:', e.reason)
            content = None
            if retry_count > 0:
                if hasattr(e, 'code') and 500 <= e.code < 600:
                    #说明是 HTTPError 错误且 HTTP CODE 为 5XX 范围说明是服务器错误，可以尝试再次下载
                    return self.download(url, retry_count-1, headers, proxy, data)
        return content

Source File: spider_main.py From SmallReptileTraining with MIT License

5 votes

def create_cookie_opener(self):
        '''
        设置启用Cookie
        :return: 返回一个自定义的opener
        '''
        cookie = cookiejar.CookieJar()
        cookie_process = request.HTTPCookieProcessor(cookie)
        opener = request.build_opener(cookie_process)
        return opener

Source File: LuoguBrowser.py From LuoguCrawler with MIT License

5 votes

def setOpener(self):
        """ 初始化opener
        """
        cj = cookiejar.CookieJar()
        pro = request.HTTPCookieProcessor(cj)
        self.opener = request.build_opener(pro)
        header = []
        for key, value in self._headers.items():
            elem = (key, value)
            header.append(elem)
        self.opener.addheaders = header

Source File: browser.py From Hatkey with GNU General Public License v3.0

5 votes

def __init__(self):
        self.cookiejar = CookieJar()
        self._cookie_processor = HTTPCookieProcessor(self.cookiejar)
        self.form = None

        self.url = "http://0.0.0.0:8080/"
        self.path = "/"
        
        self.status = None
        self.data = None
        self._response = None
        self._forms = None

Source File: jsonrpclib.py From odoorpc with GNU Lesser General Public License v3.0

5 votes

def __init__(self, host, port, timeout=120, ssl=False, opener=None):
        self._root_url = "{http}{host}:{port}".format(
            http=(ssl and "https://" or "http://"), host=host, port=port)
        self._timeout = timeout
        self._builder = URLBuilder(self)
        self._opener = opener
        if not opener:
            cookie_jar = CookieJar()
            self._opener = build_opener(HTTPCookieProcessor(cookie_jar))

Source File: nse.py From nsetools with MIT License

5 votes

def nse_opener(self):
        """
        builds opener for urllib2
        :return: opener object
        """
        cj = CookieJar()
        return build_opener(HTTPCookieProcessor(cj))

Source File: httpbot.py From humblebundle with GNU General Public License v3.0

5 votes

def __init__(self, base_url="", tag="", cookiejar=None, debug=False):
        self.tag = tag
        hh  = urllib2.HTTPHandler( debuglevel=1 if debug else 0)
        hsh = urllib2.HTTPSHandler(debuglevel=1 if debug else 0)
        cp  = urllib2.HTTPCookieProcessor(cookiejar)
        self._opener = urllib2.build_opener(hh, hsh, cp)
        scheme, netloc, path, q, f  = urlparse.urlsplit(base_url, "http")
        if not netloc:
            netloc, _, path = path.partition('/')
        self.base_url = urlparse.urlunsplit((scheme, netloc, path, q, f))

Source File: jsonrpclib.py From odoorpc with GNU Lesser General Public License v3.0

5 votes

def __init__(self, host, port, timeout=120, ssl=False, opener=None):
        self._root_url = "{http}{host}:{port}".format(
            http=(ssl and "https://" or "http://"), host=host, port=port
        )
        self._timeout = timeout
        self._builder = URLBuilder(self)
        self._opener = opener
        if not opener:
            cookie_jar = CookieJar()
            self._opener = build_opener(HTTPCookieProcessor(cookie_jar))

Source File: asf_template.py From esa_sentinel with MIT License

4 votes

def get_new_cookie(self):
        # Start by prompting user to input their credentials
        
        # Another Python2/3 workaround
        try:
            new_username = raw_input("Username: ")
        except NameError:
            new_username = input("Username: ")
        new_password = getpass.getpass(prompt="Password (will not be displayed): ")
        
        # Build URS4 Cookie request
        auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4['client'] + '&redirect_uri=' + \
                          self.asf_urs4['redir'] + '&response_type=code&state='
        
        try:
            # python2
            user_pass = base64.b64encode(bytes(new_username + ":" + new_password))
        except TypeError:
            # python3
            user_pass = base64.b64encode(bytes(new_username + ":" + new_password, "utf-8"))
            user_pass = user_pass.decode("utf-8")
        
        # Authenticate against URS, grab all the cookies
        self.cookie_jar = MozillaCookieJar()
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context))
        request = Request(auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)})
        
        # Watch out cookie rejection!
        try:
            response = opener.open(request)
        except HTTPError as e:
            if e.code == 401:
                print(" > Username and Password combo was not successful. Please try again.")
                return False
            else:
                # If an error happens here, the user most likely has not confirmed EULA.
                print("\nIMPORTANT: There was an error obtaining a download cookie!")
                print("Your user appears to lack permission to download data from the ASF Datapool.")
                print(
                    "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov")
                exit(-1)
        except URLError as e:
            print("\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. ")
            print("Try cookie generation later.")
            exit(-1)
        
        # Did we get a cookie?
        if self.check_cookie_is_logged_in(self.cookie_jar):
            # COOKIE SUCCESS!
            self.cookie_jar.save(self.cookie_jar_path)
            return True
        
        # if we aren't successful generating the cookie, nothing will work. Stop here!
        print("WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again.")
        print("Response was {0}.".format(response.getcode()))
        print(
            "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov")
        exit(-1)
    
    # make sure we're logged into URS

Source File: asf_template.py From esa_sentinel with MIT License

4 votes

def check_cookie(self):
        
        if self.cookie_jar is None:
            print(" > Cookiejar is bunk: {0}".format(self.cookie_jar))
            return False
        
        # File we know is valid, used to validate cookie
        file_check = 'https://urs.earthdata.nasa.gov/profile'
        
        # Apply custom Redirect Hanlder
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context))
        install_opener(opener)
        
        # Attempt a HEAD request
        request = Request(file_check)
        request.get_method = lambda: 'HEAD'
        try:
            print(" > attempting to download {0}".format(file_check))
            response = urlopen(request, timeout=30)
            resp_code = response.getcode()
            # Make sure we're logged in
            if not self.check_cookie_is_logged_in(self.cookie_jar):
                return False
            
            # Save cookiejar
            self.cookie_jar.save(self.cookie_jar_path)
        
        except HTTPError:
            # If we ge this error, again, it likely means the user has not agreed to current EULA
            print("\nIMPORTANT: ")
            print("Your user appears to lack permissions to download data from the ASF Datapool.")
            print(
                "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov")
            exit(-1)
        
        # This return codes indicate the USER has not been approved to download the data
        if resp_code in (300, 301, 302, 303):
            try:
                redir_url = response.info().getheader('Location')
            except AttributeError:
                redir_url = response.getheader('Location')
            
            # Funky Test env:
            if ("vertex-retired.daac.asf.alaska.edu" in redir_url and "test" in self.asf_urs4['redir']):
                print("Cough, cough. It's dusty in this test env!")
                return True
            
            print("Redirect ({0}) occured, invalid cookie value!".format(resp_code))
            return False
        
        # These are successes!
        if resp_code in (200, 307):
            return True
        
        return False

Source File: net_utils.py From NSEToolsPy with MIT License

4 votes

def __opener__():
    """
    Builds the opener for the url
    :returns: opener object
    """

    cookie_jar = CookieJar()
    return build_opener(HTTPCookieProcessor(cookie_jar))

Source File: client.py From bugatsinho.github.io with GNU General Public License v3.0

4 votes

def cfcookie(netloc, ua, timeout):
    try:
        headers = {'User-Agent': ua}

        req = urllib2.Request(netloc, headers=headers)

        try:
            urllib2.urlopen(req, timeout=int(timeout))
        except urllib2.HTTPError as response:
            result = response.read(5242880)

        jschl = re.findall('name="jschl_vc" value="(.+?)"/>', result)[0]

        init = re.findall('setTimeout\(function\(\){\s*.*?.*:(.*?)};', result)[-1]

        builder = re.findall(r"challenge-form\'\);\s*(.*)a.v", result)[0]

        decryptVal = parseJSString(init)

        lines = builder.split(';')

        for line in lines:

            if len(line) > 0 and '=' in line:

                sections = line.split('=')
                line_val = parseJSString(sections[1])
                decryptVal = int(eval(str(decryptVal) + str(sections[0][-1]) + str(line_val)))

        answer = decryptVal + len(urlparse.urlparse(netloc).netloc)

        query = '%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&jschl_answer=%s' % (netloc, jschl, answer)

        if 'type="hidden" name="pass"' in result:
            passval = re.findall('name="pass" value="(.*?)"', result)[0]
            query = '%s/cdn-cgi/l/chk_jschl?pass=%s&jschl_vc=%s&jschl_answer=%s' % (
                netloc, quote_plus(passval), jschl, answer
            )
            time.sleep(5)

        cookies = cookielib.LWPCookieJar()
        handlers = [urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies)]
        opener = urllib2.build_opener(*handlers)
        urllib2.install_opener(opener)

        try:
            req = urllib2.Request(query, headers=headers)
            urllib2.urlopen(req, timeout=int(timeout))
        except BaseException:
            pass

        cookie = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies])

        return cookie
    except BaseException:
        pass

Python urllib.request.HTTPCookieProcessor() Examples