Python Examples of pycurl.USERAGENT

Source File: files.py From docassemble with MIT License

6 votes

def fetch_url(self, url, **kwargs):
        filename = kwargs.get('filename', self.filename)
        self.fix()
        cookiefile = tempfile.NamedTemporaryFile(suffix='.txt')
        f = open(os.path.join(self.directory, filename), 'wb')
        c = pycurl.Curl()
        c.setopt(c.URL, url)
        c.setopt(c.FOLLOWLOCATION, True)
        c.setopt(c.WRITEDATA, f)
        c.setopt(pycurl.USERAGENT, 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Safari/537.36')
        c.setopt(pycurl.COOKIEFILE, cookiefile.name)
        c.perform()
        c.close()
        cookiefile.close()
        self.save()
        return

Source File: Checkpoint.py From Instagram-API with MIT License

5 votes

def request(self, endpoint, headers=None, post=None, first=True):
        buffer = BytesIO()

        ch = pycurl.Curl()

        ch.setopt(pycurl.URL, endpoint)
        ch.setopt(pycurl.USERAGENT, self.userAgent)
        ch.setopt(pycurl.WRITEFUNCTION, buffer.write)
        ch.setopt(pycurl.FOLLOWLOCATION, True)
        ch.setopt(pycurl.HEADER, True)
        if headers:
            ch.setopt(pycurl.HTTPHEADER, headers)

        ch.setopt(pycurl.VERBOSE, self.debug)
        ch.setopt(pycurl.SSL_VERIFYPEER, False)
        ch.setopt(pycurl.SSL_VERIFYHOST, False)
        ch.setopt(pycurl.COOKIEFILE, self.settingsPath + self.username + '-cookies.dat')
        ch.setopt(pycurl.COOKIEJAR, self.settingsPath + self.username + '-cookies.dat')

        if post:
            import urllib
            ch.setopt(pycurl.POST, len(post))
            ch.setopt(pycurl.POSTFIELDS, urllib.urlencode(post))

        ch.perform()
        resp = buffer.getvalue()
        header_len = ch.getinfo(pycurl.HEADER_SIZE)
        header = resp[0: header_len]
        body = resp[header_len:]
        ch.close()

        if self.debug:
            import urllib
            print("REQUEST: " + endpoint)
            if post is not None:
                if not isinstance(post, list):
                    print('DATA: ' + urllib.unquote_plus(json.dumps(post)))
            print("RESPONSE: " + body + "\n")

        return [header, json_decode(body)]

Source File: core.py From docassemble with MIT License

5 votes

def from_url(self, url):
        """Makes the contents of the file the contents of the given URL."""
        self.retrieve()
        cookiefile = tempfile.NamedTemporaryFile(suffix='.txt')
        the_path = self.file_info['path']
        f = open(the_path, 'wb')
        c = pycurl.Curl()
        c.setopt(c.URL, url)
        c.setopt(c.FOLLOWLOCATION, True)
        c.setopt(c.WRITEDATA, f)
        c.setopt(pycurl.USERAGENT, docassemble.base.functions.server.daconfig.get('user agent', 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Safari/537.36'))
        c.setopt(pycurl.COOKIEFILE, cookiefile.name)
        c.perform()
        c.close()
        self.retrieve()

Source File: pycurllib.py From BitTorrent with GNU General Public License v3.0

5 votes

def __init__(self, url):
        self.c = pycurl.Curl()
        self.headers = {}
        self.c.setopt(self.c.URL, url)

        if use_cert_authority:
            self.c.setopt(pycurl.CAINFO, CA_PATH)
            self.c.setopt(pycurl.CAPATH, CA_PATH)
            #self.c.setopt(pycurl.CA_BUNDLE, CA_PATH)
        else:
            self.c.setopt(pycurl.SSL_VERIFYHOST, 0)
            self.c.setopt(pycurl.SSL_VERIFYPEER, 0)

        if http_bindaddr:
            self.c.setopt(self.c.INTERFACE, http_bindaddr)

        if user_agent:
            self.c.setopt(pycurl.USERAGENT, user_agent)

        if use_compression:
            if _pycurl_compression:
                # If a zero-length string is set, then an Accept-Encoding header
                # containing all supported encodings is sent.
                self.c.setopt(pycurl.ENCODING, "")
            # someday, gzip manually with GzipFile
            #else:
            #    self.add_header("Accept-Encoding", "gzip")

        if timeout:
            self.c.setopt(self.c.TIMEOUT, timeout)
        if connect_timeout:
            self.c.setopt(self.c.CONNECTTIMEOUT, timeout)
        if max_connects:
            self.c.setopt(self.c.MAXCONNECTS, max_connects)

Source File: pycurllib.py From BitTorrent with GNU General Public License v3.0

5 votes

def __init__(self, url):
        self.c = pycurl.Curl()
        self.headers = {}
        self.c.setopt(self.c.URL, url)

        if use_cert_authority:
            self.c.setopt(pycurl.CAINFO, CA_PATH)
            self.c.setopt(pycurl.CAPATH, CA_PATH)
            #self.c.setopt(pycurl.CA_BUNDLE, CA_PATH)
        else:
            self.c.setopt(pycurl.SSL_VERIFYHOST, 0)
            self.c.setopt(pycurl.SSL_VERIFYPEER, 0)

        if http_bindaddr:
            self.c.setopt(self.c.INTERFACE, http_bindaddr)

        if user_agent:
            self.c.setopt(pycurl.USERAGENT, user_agent)

        if use_compression:
            if _pycurl_compression:
                # If a zero-length string is set, then an Accept-Encoding header
                # containing all supported encodings is sent.
                self.c.setopt(pycurl.ENCODING, "")
            # someday, gzip manually with GzipFile
            #else:
            #    self.add_header("Accept-Encoding", "gzip")

        if timeout:
            self.c.setopt(self.c.TIMEOUT, timeout)
        if connect_timeout:
            self.c.setopt(self.c.CONNECTTIMEOUT, timeout)
        if max_connects:
            self.c.setopt(self.c.MAXCONNECTS, max_connects)

Source File: CleanScraper.py From CleanScrape with MIT License

5 votes

def get_url (url, user_agent=UA, referrer=None):
    """Make a GET request of the url using pycurl and return the data
    (which is None if unsuccessful)"""

    data = None
    databuffer = StringIO()

    curl = pycurl.Curl()
    curl.setopt(pycurl.URL, url)
    curl.setopt(pycurl.FOLLOWLOCATION, 1)
    curl.setopt(pycurl.CONNECTTIMEOUT, 5)
    curl.setopt(pycurl.TIMEOUT, 8)
    curl.setopt(pycurl.WRITEFUNCTION, databuffer.write)
    curl.setopt(pycurl.COOKIEFILE, '')
    if user_agent:
        curl.setopt(pycurl.USERAGENT, user_agent)
    if referrer is not None:
        curl.setopt(pycurl.REFERER, referrer)
    try:
        curl.perform()
        data = databuffer.getvalue()
    except Exception:
        pass
    curl.close()

    return data

Source File: patator_ext.py From project-black with GNU General Public License v2.0

5 votes

def connect(self, host, port, scheme):
    fp = pycurl.Curl()
    fp.setopt(pycurl.SSL_VERIFYPEER, 0)
    fp.setopt(pycurl.SSL_VERIFYHOST, 0)
    fp.setopt(pycurl.HEADER, 1)
    fp.setopt(pycurl.USERAGENT, 'Mozilla/5.0')
    fp.setopt(pycurl.NOSIGNAL, 1)

    return TCP_Connection(fp)

Source File: vt-checker-hosts.py From Loki with GNU General Public License v3.0

5 votes

def download_url(host_id, url):
    """
    Downloads an URL and stores the response to a directory with named as the host/IP
    :param host_id:
    :param url:
    :return:
    """
    output = StringIO.StringIO()
    header = StringIO.StringIO()

    print "[>] Trying to download URL: %s" % url
    # Download file
    try:
        # 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)')
        c = pycurl.Curl()
        c.setopt(c.URL, url)
        c.setopt(pycurl.CONNECTTIMEOUT, 10)
        c.setopt(pycurl.TIMEOUT, 180)
        c.setopt(pycurl.FOLLOWLOCATION, 1)
        c.setopt(pycurl.USERAGENT, 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)')
        c.setopt(c.WRITEFUNCTION, output.write)
        c.setopt(c.HEADERFUNCTION, header.write)
        c.perform()
        # Header parsing
        header_info = header_function(header.getvalue())
    except Exception, e:
        if args.debug:
            traceback.print_exc()
        print_highlighted("[-] Error MESSAGE: %s" % str(e))

    # Write File

Source File: test_helpers.py From the-new-hotness with GNU Lesser General Public License v2.1

5 votes

def test_get_html_http_no_callbacks(self):
        import pycurl
        import io

        # Test http with no callbacks
        mock_curl_instance = Mock()
        mock_stringio_instance = Mock()

        mock_stringio_instance.getvalue.return_value = "mock StringIO value"

        # Expected pycurl setopt calls
        expected_setopt_calls = [
            call(pycurl.URL, b"https://example.com/file.extension"),
            call(pycurl.WRITEFUNCTION, mock_stringio_instance.write),
            call(pycurl.FOLLOWLOCATION, 1),
            call(pycurl.MAXREDIRS, 10),
            call(
                pycurl.USERAGENT,
                "Fedora Upstream Release Monitoring "
                "(https://fedoraproject.org/wiki/Upstream_release_monitoring)",
            ),
            call(pycurl.CONNECTTIMEOUT, 10),
            call(pycurl.TIMEOUT, 30),
        ]

        with patch.object(pycurl, "Curl") as mock_curl_constructor:
            with patch.object(io, "StringIO") as mock_stringio_constructor:
                mock_curl_constructor.return_value = mock_curl_instance
                mock_stringio_constructor.return_value = mock_stringio_instance

                get_html_result = helpers.get_html("https://example.com/file.extension")

        # Check curl calls
        mock_curl_instance.setopt.assert_has_calls(expected_setopt_calls)
        mock_curl_instance.perform.assert_called_once_with()
        mock_curl_instance.close.assert_called_once_with()

        self.assertEqual(get_html_result, "mock StringIO value")

Source File: test_fetch.py From landscape-client with GNU General Public License v2.0

5 votes

def test_pycurl_user_agent(self):
        """If provided, the user-agent is set in the request."""
        curl = CurlStub(b"result")
        result = fetch(
            "http://example.com", curl=curl, user_agent="user-agent")
        self.assertEqual(result, b"result")
        self.assertEqual(b"user-agent", curl.options[pycurl.USERAGENT])

Source File: restClient.py From recipebook with MIT License

5 votes

def get(url, encoding, user_agent=UA, referrer=None):
    """Make a GET request of the url using pycurl and return the data
    (which is None if unsuccessful)"""

    data = None
    databuffer = BytesIO()

    curl = pycurl.Curl()
    curl.setopt(pycurl.URL, url)
    curl.setopt(pycurl.FOLLOWLOCATION, 1)
    curl.setopt(pycurl.CONNECTTIMEOUT, 5)
    curl.setopt(pycurl.TIMEOUT, 8)
    curl.setopt(pycurl.WRITEDATA, databuffer)
    curl.setopt(pycurl.COOKIEFILE, '')
    if user_agent:
        curl.setopt(pycurl.USERAGENT, user_agent)
    if referrer is not None:
        curl.setopt(pycurl.REFERER, referrer)
    try:
        curl.perform()
        data = databuffer.getvalue().decode(encoding)
    except Exception:
        pass
    curl.close()

    return data

Source File: patator.py From patator with GNU General Public License v2.0

5 votes

def connect(self, host, port, scheme):
    fp = pycurl.Curl()
    fp.setopt(pycurl.SSL_VERIFYPEER, 0)
    fp.setopt(pycurl.SSL_VERIFYHOST, 0)
    fp.setopt(pycurl.HEADER, 1)
    fp.setopt(pycurl.USERAGENT, 'Mozilla/5.0')
    fp.setopt(pycurl.NOSIGNAL, 1)

    return TCP_Connection(fp)

Source File: InstagramRegistration.py From Instagram-API with MIT License

5 votes

def request(self, endpoint, post=None):
        buffer = BytesIO()

        ch = pycurl.Curl()
        ch.setopt(pycurl.URL, Constants.API_URL + endpoint)
        ch.setopt(pycurl.USERAGENT, self.userAgent)
        ch.setopt(pycurl.WRITEFUNCTION, buffer.write)
        ch.setopt(pycurl.FOLLOWLOCATION, True)
        ch.setopt(pycurl.HEADER, True)
        ch.setopt(pycurl.VERBOSE, False)
        ch.setopt(pycurl.COOKIEFILE, os.path.join(self.IGDataPath, self.username, self.username + "-cookies.dat"))
        ch.setopt(pycurl.COOKIEJAR, os.path.join(self.IGDataPath, self.username, self.username + "-cookies.dat"))

        if post is not None:
            ch.setopt(pycurl.POST, True)
            ch.setopt(pycurl.POSTFIELDS, post)

        if self.proxy:
            ch.setopt(pycurl.PROXY, self.proxyHost)
            if self.proxyAuth:
                ch.setopt(pycurl.PROXYUSERPWD, self.proxyAuth)

        ch.perform()
        resp = buffer.getvalue()
        header_len = ch.getinfo(pycurl.HEADER_SIZE)
        header = resp[0: header_len]
        body = resp[header_len:]

        ch.close()

        if self.debug:
            print("REQUEST: " + endpoint)
            if post is not None:
                if not isinstance(post, list):
                    print("DATA: " + str(post))
            print("RESPONSE: " + body)

        return [header, json_decode(body)]

Source File: helpers.py From the-new-hotness with GNU Lesser General Public License v2.1

4 votes

def get_html(url, callback=None, errback=None):
    if url.startswith("ftp://"):
        import urllib

        req = urllib.request.urlopen(url)  # nosec
        data = req.read()
        if callback:
            try:
                for cb in callback:
                    cb(data)
            except TypeError:
                callback(data)
        return data
    else:
        if callback:
            from twisted.web.client import getPage

            df = getPage(url)
            try:
                for cb in callback:
                    df.addCallback(cb)
            except TypeError:
                df.addCallback(callback)

            if errback:
                try:
                    for eb in errback:
                        df.addErrback(eb)
                except TypeError:
                    df.addErrback(errback)
        else:
            import io
            import pycurl

            res = io.StringIO()

            c = pycurl.Curl()
            c.setopt(pycurl.URL, url.encode("ascii"))

            c.setopt(pycurl.WRITEFUNCTION, res.write)
            c.setopt(pycurl.FOLLOWLOCATION, 1)
            c.setopt(pycurl.MAXREDIRS, 10)
            c.setopt(
                pycurl.USERAGENT,
                "Fedora Upstream Release Monitoring "
                "(https://fedoraproject.org/wiki/Upstream_release_monitoring)",
            )
            c.setopt(pycurl.CONNECTTIMEOUT, 10)
            c.setopt(pycurl.TIMEOUT, 30)

            c.perform()
            c.close()

            data = res.getvalue()
            res.close()
            return data

Source File: analyze_packages.py From pyCAF with GNU General Public License v3.0

4 votes

def get_rh_sso(self):
        """
        Function wich used login and password to connect at www.redhat.com and return
        the "rh_sso" proxy used for request authentication.
        """
        self._logger.debug("RHEL rh_sso getting in progress...")
        username = self.config_server.redhat["login_username"]
        password = self.config_server.redhat["login_pass"]
        url = 'https://www.redhat.com/wapps/sso/login.html'
        values={'username':username,'password':password,'_flowId':'legacy-login-flow','redirect':'https://www.redhat.com/wapps/ugc/protected/account.html','failureRedirect':'http://www.redhat.com/wapps/sso/login.html'}
        data_post = urllib.urlencode(values)
      
        headers = Storage()
        
        c = pycurl.Curl()
        c.setopt(pycurl.URL, url)
#==============================================================================
#     DEBUG comments in order to use the Burp Proxy software 
#==============================================================================
    #c.setopt(pycurl.PROXY, '127.0.0.1')
    #c.setopt(pycurl.PROXYPORT, 8080)
    #c.setopt(pycurl.SSL_VERIFYPEER, 1)
    #c.setopt(pycurl.SSL_VERIFYHOST, 2)
    #c.setopt(pycurl.CAINFO, "/home/thierry/Divers/Burp/cacert.crt")
#==============================================================================
        c.setopt(pycurl.USERAGENT, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0")
        c.setopt(pycurl.HTTPHEADER, ['Accept-Language: en-US,en;q=0.5',
                                     'Connection: keep-alive',
                                     'www.whitehouse.gov: 200'])
        c.setopt(pycurl.POST, 1)
        c.setopt(pycurl.POSTFIELDS, data_post)
        
        c.setopt(c.HEADERFUNCTION, headers.store)
        
        c.perform()
        c.close()
        headers = str(headers)
        
        expression = r"(?P<var1>.*)(?P<rh_sso>Set-Cookie: rh_sso=)(?P<sso_key>(?!\").*?)(?P<end>;)"
        header_lines = headers.split('\n')
        for head in header_lines:
            result_re = re.match(expression, head)
            
            if result_re is not None:
                sso_key = "rh_sso=" + str(result_re.group('sso_key'))
        self._logger.debug("rh_sso value : "+ str(sso_key))
        return sso_key

Source File: download.py From launcher with GNU General Public License v3.0

4 votes

def run(self):
        c = pycurl.Curl()
        c.setopt(pycurl.URL, self.url)
        c.setopt(pycurl.FOLLOWLOCATION, True)
        c.setopt(pycurl.MAXREDIRS, 4)
        #c.setopt(pycurl.NOBODY, 1)

        c.setopt(c.VERBOSE, True)

        #c.setopt(pycurl.CONNECTTIMEOUT, 20)
        
        if self.useragent:
            c.setopt(pycurl.USERAGENT, self.useragent)
        
        # add cookies, if available
        if self.cookies:
            c.setopt(pycurl.COOKIE, self.cookies)
        
        #realurl = c.getinfo(pycurl.EFFECTIVE_URL)
        realurl = self.url
        print("realurl",realurl)
        self.filename = realurl.split("/")[-1].strip()
        c.setopt(pycurl.URL, realurl)
        c.setopt(pycurl.FOLLOWLOCATION, True)
        c.setopt(pycurl.NOPROGRESS, False)
        c.setopt(pycurl.XFERINFOFUNCTION, self.getProgress)
        
        c.setopt(pycurl.SSL_VERIFYPEER, False)
        c.setopt(pycurl.SSL_VERIFYHOST, False)
        
        # configure pycurl output file
        if self.path == False:
            self.path = os.getcwd()
        filepath = os.path.join(self.path, self.filename)
        
         
        if os.path.exists(filepath):## remove old file,restart download 
            os.system("rm -rf " + filepath)
        
        buffer = StringIO() 
        c.setopt(pycurl.WRITEDATA, buffer)
        
        self._dst_path = filepath

        # add cookies, if available
        if self.cookies:
            c.setopt(pycurl.COOKIE, self.cookies)
        
        self._stop = False
        self.progress["stopped"] = False 
        # download file
        try:
            c.perform()
        except pycurl.error, error:
            errno,errstr = error
            print("curl error: %s" % errstr)
            self._errors.append(errstr)
            self._stop = True
            self.progress["stopped"] = True
            self.stop()

Source File: utils.py From APIFuzzer with GNU General Public License v3.0

4 votes

def init_pycurl(debug=False):
    """
    Provides an instances of pycurl with basic configuration
    :return: pycurl instance
    """
    _curl = pycurl.Curl()
    _curl.setopt(pycurl.SSL_OPTIONS, pycurl.SSLVERSION_TLSv1_2)
    _curl.setopt(pycurl.SSL_VERIFYPEER, False)
    _curl.setopt(pycurl.SSL_VERIFYHOST, False)
    _curl.setopt(pycurl.VERBOSE, debug)
    _curl.setopt(pycurl.TIMEOUT, 10)
    _curl.setopt(pycurl.COOKIEFILE, "")
    _curl.setopt(pycurl.USERAGENT, 'APIFuzzer')
    return _curl

Python pycurl.USERAGENT Examples