Python urllib._urlopener() Examples

The following are 3 code examples of urllib._urlopener(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module urllib , or try the search function

Example #1

Source File: gooenum.py From Yuki-Chan-The-Auto-Pentest with MIT License

5 votes

def scrape_google(dom):
    """
    Function for enumerating sub-domains and hosts by scrapping Google. It returns a unique
    list if host name extracted from the HREF entries from the Google search.
    """
    results = []
    filtered = []
    searches = ["100", "200", "300", "400", "500"]
    data = ""
    urllib._urlopener = AppURLopener()
    user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
    headers = {'User-Agent': user_agent, }
    #opener.addheaders = [('User-Agent','Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)')]
    for n in searches:
        url = "http://google.com/search?hl=en&lr=&ie=UTF-8&q=%2B" + dom + "&start=" + n + "&sa=N&filter=0&num=100"
        try:
            sock = urllib.urlopen(url)
            data += sock.read()
            sock.close()
        except AttributeError:
            request = urllib.request.Request(url, None, headers)
            response = urllib.request.urlopen(request)
            data += str(response.read())
    results.extend(unique(re.findall("href=\"htt\w{1,2}:\/\/([^:?]*[a-b0-9]*[^:?]*\." + dom + ")\/", data)))
    # Make sure we are only getting the host
    for f in results:
        filtered.extend(re.findall("^([a-z.0-9^]*" + dom + ")", f))
    time.sleep(2)
    return unique(filtered)

Example #2

Source File: wsclient.py From seq2seq with Apache License 2.0

5 votes

def wsopen(self, url, post, **params):
                noparam = params.pop('noparam',False)
                if noparam:
                        params = {}
                else:
                        if self.user is not None:
                                params['user'] = self.user
                        if self.password is not None:
                                params.pop('hmac', None)
                                HMAC=hmac.new(self.password)
                                for k,v in sorted(params.items()):
                                        HMAC.update("%s=%s" % (k,v))
                                params.update({'hmac':HMAC.hexdigest()})
                query = urllib.urlencode(params)
                if post:
                        body = query
                elif query:
                        url = "{}?{}".format(url, query)

                if self.debug:
                        if post:
                                print("POST:\n{}\n{!r}\n".format(url, body), file=sys.stderr)
                        else:
                                print("GET:\n{}\n".format(url), file=sys.stderr)

                class URLopener(urllib.FancyURLopener):
                        def http_error_default(self, url, fp, errcode, errmsg, headers):
                                return urllib.addinfourl(fp, headers, "http:" + url, errcode)
                try:
                        urllib._urlopener = URLopener()
                        if post:
                                resp = urllib.urlopen(url, body)
                        else:
                                resp = urllib.urlopen(url)
                except IOError as e:
                        raise WSError(url, msg=e)
                if self.debug:
                        print("RESPONSE:\n{}\n{}".format(resp.getcode(), resp.info()), file=sys.stderr)
                if resp.getcode() != 200:
                        raise WSError(url, resp.getcode(), resp.read())
                return resp

Example #3

Source File: urllib.py From opentracing-python-instrumentation with MIT License

4 votes

def install_patches():
    if six.PY3:
        # The old urllib does not exist in Py3, so delegate to urllib2 patcher
        from . import urllib2
        urllib2.install_patches()
        return

    import urllib
    import urlparse

    log.info('Instrumenting urllib methods for tracing')

    class TracedURLOpener(urllib.FancyURLopener):

        def open(self, fullurl, data=None):
            parsed_url = urlparse.urlparse(fullurl)
            host = parsed_url.hostname or None
            port = parsed_url.port or None

            span = utils.start_child_span(
                operation_name='urllib', parent=current_span_func())

            span.set_tag(ext_tags.SPAN_KIND, ext_tags.SPAN_KIND_RPC_CLIENT)

            # use span as context manager so that its finish() method is called
            with span:
                span.set_tag(ext_tags.HTTP_URL, fullurl)
                if host:
                    span.set_tag(ext_tags.PEER_HOST_IPV4, host)
                if port:
                    span.set_tag(ext_tags.PEER_PORT, port)
                # TODO add callee service name
                # TODO add headers to propagate trace
                # cannot use super here, this is an old style class
                fileobj = urllib.FancyURLopener.open(self, fullurl, data)
                if fileobj.getcode() is not None:
                    span.set_tag(ext_tags.HTTP_STATUS_CODE, fileobj.getcode())

            return fileobj

        def retrieve(self, url, filename=None, reporthook=None, data=None):
            raise NotImplementedError

    urllib._urlopener = TracedURLOpener()