Python Examples of urllib.request.build

Source File: CME1.py From web-scraping with Apache License 2.0

7 votes

def scrape(category_name,commodity_name):
    
    #i use proxy handler cuz my uni network runs on its proxy
    #and i cannot authenticate python through the proxy
    #so i use empty proxy to bypass the authentication
    proxy_handler = u.ProxyHandler({})
    opener = u.build_opener(proxy_handler)
    
    #cme officially forbids scraping
    #so a header must be used for disguise as an internet browser
    #the developers say no to scraping, it appears to be so
    #but actually they turn a blind eye to us, thx
    #i need different types of commodity
    #so i need to format the website for each commodity
    req=u.Request('http://www.cmegroup.com/trading/metals/%s/%s.html'%(
            category_name,commodity_name),headers={'User-Agent': 'Mozilla/5.0'})
    response=opener.open(req)
    result=response.read()
    soup=bs(result,'html.parser')
    
    return soup


#

Source File: githubpy.py From osint-scraper with MIT License

7 votes

def get_access_token(self, code, state=None):
        '''
        In callback url: http://host/callback?code=123&state=xyz
        use code and state to get an access token.
        '''
        kw = dict(client_id=self._client_id, client_secret=self._client_secret, code=code)
        if self._redirect_uri:
            kw['redirect_uri'] = self._redirect_uri
        if state:
            kw['state'] = state
        opener = build_opener(HTTPSHandler)
        request = Request('https://github.com/login/oauth/access_token', data=_encode_params(kw))
        request.get_method = _METHOD_MAP['POST']
        request.add_header('Accept', 'application/json')
        try:
            response = opener.open(request, timeout=TIMEOUT)
            r = _parse_json(response.read())
            if 'error' in r:
                raise ApiAuthError(str(r.error))
            return str(r.access_token)
        except HTTPError as e:
            raise ApiAuthError('HTTPError when get access token')

Source File: common.py From acmpv with Do What The F*ck You Want To Public License

6 votes

def get_response(url, faker = False):
    logging.debug('get_response: %s' % url)

    # install cookies
    if cookies:
        opener = request.build_opener(request.HTTPCookieProcessor(cookies))
        request.install_opener(opener)

    if faker:
        response = request.urlopen(request.Request(url, headers = fake_headers), None)
    else:
        response = request.urlopen(url)

    data = response.read()
    if response.info().get('Content-Encoding') == 'gzip':
        data = ungzip(data)
    elif response.info().get('Content-Encoding') == 'deflate':
        data = undeflate(data)
    response.data = data
    return response

# DEPRECATED in favor of get_content()

Source File: exposition.py From client_python with Apache License 2.0

6 votes

def default_handler(url, method, timeout, headers, data):
    """Default handler that implements HTTP/HTTPS connections.

    Used by the push_to_gateway functions. Can be re-used by other handlers."""

    def handle():
        request = Request(url, data=data)
        request.get_method = lambda: method
        for k, v in headers:
            request.add_header(k, v)
        resp = build_opener(HTTPHandler).open(request, timeout=timeout)
        if resp.code >= 400:
            raise IOError("error talking to pushgateway: {0} {1}".format(
                resp.code, resp.msg))

    return handle

Source File: httpclient.py From opsbro with MIT License

6 votes

def post(uri, params={}, headers={}):
        data = None  # always none in GET
        
        if params:
            # TODO: sure it's json and not urlencode?
            # data = urlencode(params)
            data = unicode_to_bytes(jsoner.dumps(params))
        
        url_opener = build_opener(HTTPHandler)
        
        req = Request(uri, data)
        req.get_method = lambda: 'POST'
        for (k, v) in headers.items():
            req.add_header(k, v)
        request = url_opener.open(req)
        response = request.read()
        # code = request.code
        return response

Source File: httpclient.py From opsbro with MIT License

6 votes

def put(uri, data=None, params={}, headers=None):
        # data = None  # always none in GET
        if headers is None:
            headers = {}
        
        if params:
            # TODO: sure it's json and not urlencode?
            # data = urlencode(params)
            uri = "%s?%s" % (uri, urlencode(params))
            headers['Content-Type'] = 'your/contenttype'
        
        url_opener = build_opener(HTTPHandler)
        
        req = Request(uri, data)
        req.get_method = lambda: 'PUT'
        for (k, v) in headers.items():
            req.add_header(k, v)
        request = url_opener.open(req)
        response = request.read()
        # code = request.code
        return response

Source File: website_helper.py From aws-media-insights-engine with Apache License 2.0

6 votes

def send_response(event, context, response_status, response_data):
    """
    Send a resource manipulation status response to CloudFormation
    """
    response_body = json.dumps({
        "Status": response_status,
        "Reason": "See the details in CloudWatch Log Stream: " + context.log_stream_name,
        "PhysicalResourceId": context.log_stream_name,
        "StackId": event['StackId'],
        "RequestId": event['RequestId'],
        "LogicalResourceId": event['LogicalResourceId'],
        "Data": response_data
    })

    LOGGER.info('ResponseURL: {s}'.format(s=event['ResponseURL']))
    LOGGER.info('ResponseBody: {s}'.format(s=response_body))

    opener = build_opener(HTTPHandler)
    request = Request(event['ResponseURL'], data=response_body.encode('utf-8'))
    request.add_header('Content-Type', '')
    request.add_header('Content-Length', len(response_body))
    request.get_method = lambda: 'PUT'
    response = opener.open(request)
    LOGGER.info("Status code: {s}".format(s=response.getcode))
    LOGGER.info("Status message: {s}".format(s=response.msg))

Source File: httpclient.py From opsbro with MIT License

6 votes

def delete(uri, params={}, headers={}):
        data = None  # always none in GET
        
        if params:
            uri = "%s?%s" % (uri, urlencode(params))
        
        url_opener = build_opener(HTTPHandler)
        
        req = Request(uri, data)
        req.get_method = lambda: 'DELETE'
        for (k, v) in headers.items():
            req.add_header(k, v)
        request = url_opener.open(req)
        response = request.read()
        # code = request.code
        return response

Source File: py3.py From php-load-test with Do What The F*ck You Want To Public License

6 votes

def check_php_multipartform_dos(url, post_body, headers, ip):
    try:
        proxy_handler = urllib2.ProxyHandler({"http": ip})
        null_proxy_handler = urllib2.ProxyHandler({})
        opener = urllib2.build_opener(proxy_handler)
        urllib2.install_opener(opener)
        req = urllib2.Request(url)
        for key in headers.keys():
            req.add_header(key, headers[key])
        starttime = datetime.datetime.now()
        fd = urllib2.urlopen(req, post_body)
        html = fd.read()
        endtime = datetime.datetime.now()
        usetime = (endtime - starttime).seconds
        if(usetime > 5):
            result = url+" is vulnerable"
        else:
            if(usetime > 3):
                result = "need to check normal respond time"
        return [result, usetime]
    except KeyboardInterrupt:
        exit()
# end

Source File: app.py From aws-media-insights-engine with Apache License 2.0

6 votes

def send_response(event, context, response_status, response_data):
    '''Send a resource manipulation status response to CloudFormation'''
    response_body = json.dumps({
        "Status": response_status,
        "Reason": "See the details in CloudWatch Log Stream: " + context.log_stream_name,
        "PhysicalResourceId": context.log_stream_name,
        "StackId": event['StackId'],
        "RequestId": event['RequestId'],
        "LogicalResourceId": event['LogicalResourceId'],
        "Data": response_data
    })

    logger.info('ResponseURL: %s', event['ResponseURL'])
    logger.info('ResponseBody: %s', response_body)

    opener = build_opener(HTTPHandler)
    request = Request(event['ResponseURL'], data=response_body.encode('utf-8'))
    request.add_header('Content-Type', '')
    request.add_header('Content-Length', len(response_body))
    request.get_method = lambda: 'PUT'
    response = opener.open(request)
    logger.info("Status code: %s", response.getcode())
    logger.info("Status message: %s", response.msg)

Source File: scholar.py From dblp with MIT License

6 votes

def __init__(self):
        self.articles = []
        self.query = None
        self.cjar = MozillaCookieJar()

        # If we have a cookie file, load it:
        if ScholarConf.COOKIE_JAR_FILE and \
           os.path.exists(ScholarConf.COOKIE_JAR_FILE):
            try:
                self.cjar.load(ScholarConf.COOKIE_JAR_FILE,
                               ignore_discard=True)
                ScholarUtils.log('info', 'loaded cookies file')
            except Exception as msg:
                ScholarUtils.log('warn', 'could not load cookies file: %s' % msg)
                self.cjar = MozillaCookieJar() # Just to be safe

        self.opener = build_opener(HTTPCookieProcessor(self.cjar))
        self.settings = None # Last settings object, if any

Source File: ghost.py From GhostPotato with MIT License

6 votes

def do_socks(self, line):
        headers = ["Protocol", "Target", "Username", "AdminStatus", "Port"]
        url = "http://localhost:9090/ntlmrelayx/api/v1.0/relays"
        try:
            proxy_handler = ProxyHandler({})
            opener = build_opener(proxy_handler)
            response = Request(url)
            r = opener.open(response)
            result = r.read()
            items = json.loads(result)
        except Exception as e:
            logging.error("ERROR: %s" % str(e))
        else:
            if len(items) > 0:
                self.printTable(items, header=headers)
            else:
                logging.info('No Relays Available!')

Source File: common.py From acmpv with Do What The F*ck You Want To Public License

6 votes

def get_response(url, faker = False):
    logging.debug('get_response: %s' % url)

    # install cookies
    if cookies:
        opener = request.build_opener(request.HTTPCookieProcessor(cookies))
        request.install_opener(opener)

    if faker:
        response = request.urlopen(request.Request(url, headers = fake_headers), None)
    else:
        response = request.urlopen(url)

    data = response.read()
    if response.info().get('Content-Encoding') == 'gzip':
        data = ungzip(data)
    elif response.info().get('Content-Encoding') == 'deflate':
        data = undeflate(data)
    response.data = data
    return response

# DEPRECATED in favor of get_content()

Source File: unixclient.py From opsbro with MIT License

5 votes

def get_local(u, local_socket, params={}, method='GET', timeout=10):
    UnixHTTPConnection.socket_timeout = timeout
    data = None
    special_headers = []
    
    if method == 'GET' and params:
        u = "%s?%s" % (u, urlencode(params))
    if method == 'POST' and params:
        data = string_encode(urlencode(params))
    if method == 'PUT' and params:
        special_headers.append(('Content-Type', 'your/contenttype'))
        data = string_encode(params)
    
    # not the same way to connect
    # * windows: TCP
    # * unix   : unix socket
    if os.name == 'nt':
        url_opener = build_opener(HTTPHandler)
        uri = 'http://127.0.0.1:6770%s' % u
    else:  # unix
        url_opener = build_opener(UnixSocketHandler())
        uri = 'unix:/%s%s' % (local_socket, u)
    
    logger.debug("Connecting to local http/unix socket at: %s with method %s" % (uri, method))
    
    req = Request(uri, data)
    req.get_method = lambda: method
    for (k, v) in special_headers:
        req.add_header(k, v)
    request = url_opener.open(req)
    response = request.read()
    code = request.code
    return (code, response)

Source File: payloadtools.py From wfuzz with GNU General Public License v2.0

5 votes

def _do_search(self, offset=0, limit=50):
        # some code taken from http://www.securitybydefault.com/2014/07/search2auditpy-deja-que-bing-haga-el.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+SecurityByDefault+%28Security+By+Default%29
        # api doc http://go.microsoft.com/fwlink/?LinkID=248077
        user_agent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; FDM; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 1.1.4322)'
        creds = (':%s' % self._key).encode('base64')[:-1]
        auth = 'Basic %s' % creds

        result = None

        try:
            urlstr = 'https://api.datamarket.azure.com/Data.ashx/Bing/Search/Composite?Sources=%27web%27&Query=%27' + self._dork + '%27&$format=json'
            if limit != 50:
                urlstr += "&$top=%d" % limit
            if offset != 0:
                urlstr += "&$skip=%d" % offset

            request = Request(urlstr)

            request.add_header('Authorization', auth)
            request.add_header('User-Agent', user_agent)
            requestor = build_opener()
            result = requestor.open(request)
        except Exception as e:
            raise FuzzExceptResourceParseError("Error when retrieving Bing API results: %s." % str(e))

        results = json.loads(result.read())

        # WebTotal is not reliable, it is usually much bigger than the actual results, therefore
        # if your offset increases over the real number of results, you get a dict
        # without values and counters to ''. It gets updated when you are close to that limit though.
        if results['d']['results'][0]["WebTotal"]:
            res_total = int(results['d']['results'][0]["WebTotal"])
            res_list = results['d']['results'][0]['Web']

            return res_total, len(res_list), res_list
        else:
            return 0, 0, 0

Source File: __init__.py From article-date-extractor with MIT License

5 votes

def extractArticlePublishedDate(articleLink, html = None):

    print("Extracting date from " + articleLink)

    articleDate = None

    try:
        articleDate = _extractFromURL(articleLink)

        if html is None:
            request = urllib.Request(articleLink)
            # Using a browser user agent, decreases the change of sites blocking this request - just a suggestion
            # request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36')
            html = urllib.build_opener().open(request).read()

        parsedHTML = BeautifulSoup(html,"lxml")

        possibleDate = _extractFromLDJson(parsedHTML)
        if possibleDate is None:
            possibleDate = _extractFromMeta(parsedHTML)
        if possibleDate is None:
            possibleDate = _extractFromHTMLTag(parsedHTML)


        articleDate = possibleDate

    except Exception as e:
        print("Exception in extractArticlePublishedDate for " + articleLink)
        print(e.args)   

    return articleDate

Source File: ad_manager.py From googleads-python-lib with Apache License 2.0

5 votes

def __init__(self, ad_manager_client, version=sorted(_SERVICE_MAP.keys())[-1],
               server=None):
    """Initializes a DataDownloader.

    Args:
      ad_manager_client: The AdManagerClient whose attributes will be used to
          authorize your report download and PQL query requests.
      [optional]
      version: A string identifying the Ad Manager version to connect to.
          This defaults to what is currently the latest version. This will be
          updated in future releases to point to what is then the
          latest version.
      server: A string identifying the webserver hosting the Ad Manager API.
    """
    if not server:
      server = DEFAULT_ENDPOINT

    if server[-1] == '/':
      server = server[:-1]

    self._ad_manager_client = ad_manager_client
    self._version = version
    self._server = server
    self._report_service = None
    self._pql_service = None
    self.proxy_config = self._ad_manager_client.proxy_config
    handlers = self.proxy_config.GetHandlers()
    self.url_opener = build_opener(*handlers)

    if self._ad_manager_client.custom_http_headers:
      self.url_opener.addheaders.extend(
          self._ad_manager_client.custom_http_headers.items())

Source File: util.py From chicago-justice with GNU General Public License v3.0

5 votes

def load_html(url, with_cookies=False, headers={}):
    """Attempts to load an HTML page, returning a BeautifulSoup instance. Raises
    any networking or parsing exceptions"""
    if with_cookies:
        cj = CookieJar()
        opener = urlopen.build_opener(urlopen.HTTPCookieProcessor(cj))
    else:
        opener = urlopen.build_opener()

    request = urlopen.Request(url, headers=headers)

    response = opener.open(request)
    html = response.read().decode('utf-8', errors='replace')

    soup = BeautifulSoup(html, 'html.parser')
    return soup

Source File: common.py From googleads-python-lib with Apache License 2.0

5 votes

def BuildOpener(self):
    """Builds an OpenerDirector instance using the ProxyConfig settings.

    This will return a urllib2.request.OpenerDirector instance.

    Returns:
      An OpenerDirector instance instantiated with settings defined in the
      ProxyConfig instance.
    """
    return build_opener(*self.GetHandlers())

Source File: adwords.py From googleads-python-lib with Apache License 2.0

5 votes

def __init__(self, adwords_client, version=sorted(_SERVICE_MAP.keys())[-1],
               server=None):
    """Initializes a ReportDownloader.

    Args:
      adwords_client: The AdwordsClient whose attributes will be used to
          authorize your report download requests.
      [optional]
      version: A string identifying the AdWords version to connect to. This
          defaults to what is currently the latest version. This will be updated
          in future releases to point to what is then the latest version.
      server: A string identifying the webserver hosting the AdWords API.
    """
    if not server:
      server = _DEFAULT_ENDPOINT

    server = server.rstrip('/')
    self._adwords_client = adwords_client
    self._namespace = self._NAMESPACE_FORMAT % version
    self._end_point = self._END_POINT_FORMAT % (server, version)
    self._header_handler = _AdWordsHeaderHandler(
        adwords_client, version, self._adwords_client.enable_compression,
        self._adwords_client.custom_http_headers)
    self.proxy_config = self._adwords_client.proxy_config
    handlers = self.proxy_config.GetHandlers()
    self.url_opener = build_opener(*handlers)
    if self._adwords_client.custom_http_headers:
      self.url_opener.addheaders.extend(
          adwords_client.custom_http_headers.items())

    schema_url = self._SCHEMA_FORMAT % (server, version)
    service_class = googleads.common.GetSchemaHelperForLibrary()
    self.schema_helper = service_class(
        schema_url, self._adwords_client.timeout,
        self.proxy_config, self._namespace, self._adwords_client.cache)

Source File: adwords.py From googleads-python-lib with Apache License 2.0

5 votes

def __init__(self, request_builder, upload_url, current_content_length=0,
               is_last=False):
    """Initializes the IncrementalUpload.

    Args:
      request_builder: an AbstractUploadRequestBuilder instance.
      upload_url: a string url provided by the BatchJobService.
      current_content_length: an integer identifying the current content length
        of data uploaded to the Batch Job.
      is_last: a boolean indicating whether this is the final increment.
    Raises:
      GoogleAdsValueError: if the content length is lower than 0.
    """
    self._request_builder = request_builder
    if current_content_length < 0:
      raise googleads.errors.GoogleAdsValueError(
          'Current content length %s is < 0.' % current_content_length)
    self._current_content_length = current_content_length
    self._is_last = is_last
    self._url_opener = build_opener(
        *self._request_builder.client.proxy_config.GetHandlers())
    if self._request_builder.client.custom_http_headers:
      self._url_opener.addheaders.extend(
          self._request_builder.client.custom_http_headers.items())

    self._upload_url = self._InitializeURL(upload_url, current_content_length)

Source File: url.py From beibq with BSD 3-Clause "New" or "Revised" License

5 votes

def post_url(url, data):
    header = {
        'User-Agent':'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0',
    }
    req = request.Request(url, headers = header)
    data = parse.urlencode(data)
    opener = request.build_opener(request.HTTPCookieProcessor()) 
    response = opener.open(req, data)
    return response.read()

Source File: 刷网页.py From Python-Code with MIT License

5 votes

def brash(proxy_dict):
    #print(proxy_dict)
    global count
    global count1
    if count1 < 100:
        try:  #正常运行
            count = count + 1
            print(count, 'times')  #监视程序是否在正常运行，输出运行了多少次
            proxy_handler = request.ProxyHandler({'http': proxy_dict})
            opener = request.build_opener(proxy_handler)
            request.install_opener(opener)
            countUrl = len(url)
            for i in range(countUrl):  #遍历所有url
                req = request.Request(url[i], headers=head, method='POST')
                try:
                    #lock.acquire()
                    response = request.urlopen(req)  #访问网页
                    html = response.read().decode('utf-8')
                    print(html)
                    #lock.release()
                except urllib.error.URLError as e:
                    print(e.reason)
                    print("EEEEEE")
            #time.sleep(1)  #间隔执行

        except Exception:  #出现异常
            print('Retry')
            count1 = count1 + 1
            time.sleep(1)  #间隔执行
    else:
        print('much error')

Source File: compat.py From johnnydep with MIT License

5 votes

def urlretrieve(url, filename, data=None, auth=None):
    if auth is not None:
        # https://docs.python.org/2.7/howto/urllib2.html#id6
        password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()

        # Add the username and password.
        # If we knew the realm, we could use it instead of None.
        username, password = auth
        top_level_url = urlparse(url).netloc
        password_mgr.add_password(None, top_level_url, username, password)

        handler = urllib2.HTTPBasicAuthHandler(password_mgr)

        # create "opener" (OpenerDirector instance)
        opener = urllib2.build_opener(handler)
    else:
        opener = urllib2.build_opener()

    res = opener.open(url, data=data)

    headers = res.info()

    with open(filename, "wb") as fp:
        fp.write(res.read())

    return filename, headers

Source File: common.py From acmpv with Do What The F*ck You Want To Public License

5 votes

def set_http_proxy(proxy):
    if proxy == None: # Use system default setting
        proxy_support = request.ProxyHandler()
    elif proxy == '': # Don't use any proxy
        proxy_support = request.ProxyHandler({})
    else: # Use proxy
        proxy_support = request.ProxyHandler({'http': '%s' % proxy, 'https': '%s' % proxy})
    opener = request.build_opener(proxy_support)
    request.install_opener(opener)

Source File: default.py From ru with GNU General Public License v2.0

5 votes

def get_html(web_url):
    cookie_jar = cookielib.CookieJar()
    if mode == 'FAVS':
        cookie_jar = auth(cookie_jar)
    if antizapret_enabled:
        opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor(cookie_jar),
                                             az.AntizapretProxyHandler())
    else:
        opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor(cookie_jar))
    opener.addheaders = [("User-Agent", USER_AGENT)]
    connection = opener.open(web_url)
    html = connection.read()
    connection.close()
    return html.decode('utf-8')

Source File: default.py From ru with GNU General Public License v2.0

5 votes

def get_html_with_referer(page_url, referer):
    cookie_jar = cookielib.CookieJar()
    if mode == 'FAVS':
        cookie_jar = auth(cookie_jar)
    if antizapret_enabled:
        opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor(cookie_jar),
                                             az.AntizapretProxyHandler())
    else:
        opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor(cookie_jar))
    if referer is not None:
        opener.addheaders = [("Referer", referer)]
    connection = opener.open(page_url)
    html = connection.read()
    connection.close()
    return html.decode('utf-8')

Source File: common.py From acmpv with Do What The F*ck You Want To Public License

5 votes

def unset_proxy():
    proxy_handler = request.ProxyHandler({})
    opener = request.build_opener(proxy_handler)
    request.install_opener(opener)

# DEPRECATED in favor of set_proxy() and unset_proxy()

Source File: default.py From ru with GNU General Public License v2.0

5 votes

def post_request(page_url, req_data=None, headers=None):
    if headers is None:
        headers = {}
    if antizapret_enabled:
        opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor(),
                                             az.AntizapretProxyHandler())
    else:
        opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor())
    opener.addheaders = [("User-Agent", USER_AGENT)]
    conn = urllib_request.Request(page_url, urllib_parse.urlencode(req_data).encode('utf-8'), headers)
    connection = opener.open(conn)
    html = connection.read()
    return html

Source File: http.py From suds-py3 with GNU Lesser General Public License v3.0

5 votes

def u2opener(self):
        """
        Create a urllib opener.
        @return: An opener.
        @rtype: I{OpenerDirector}
        """
        if self.urlopener is None:
            return u2.build_opener(*self.u2handlers())
        else:
            return self.urlopener

Python urllib.request.build_opener() Examples