Python urllib.error.URLError() Examples

The following are 30 code examples of urllib.error.URLError(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module urllib.error , or try the search function .
Example #1
Source File: download.py    From gog-galaxy-plugin-downloader with GNU General Public License v3.0 9 votes vote down vote up
def get_plugin_config(config_uri):
    """
    Downloads/opens configuration yaml file, returns
    dict of Galaxy plugins
    """
    # Try to open the URI as a URL or fall back to opening local file
    try:
        config_uri_parsed = urlparse(config_uri)
        if config_uri_parsed.scheme in ['https', 'http']:
            url = urlopen(config_uri)
            yaml_data = url.read()
        else:
            with open(config_uri, 'r') as file_data:
                yaml_data = file_data.read()
    except URLError as e:
        print(e)

    # Parse the YAML configuration
    try:
        plugin_data = yaml.safe_load(yaml_data)

        return plugin_data['plugins']
    except yaml.YAMLError as e:
        print(e) 
Example #2
Source File: pubchem.py    From QCElemental with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_sdf(self):
        """Function to return the SDF (structure-data file) of the PubChem object."""
        from urllib.request import urlopen, Request
        from urllib.parse import quote
        from urllib.error import URLError

        if len(self.dataSDF) == 0:
            url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/SDF?record_type=3d".format(
                quote(str(self.cid))
            )
            req = Request(url, headers={"Accept": "chemical/x-mdl-sdfile"})
            try:
                self.dataSDF = urlopen(req).read().decode("utf-8")
            except URLError as e:
                msg = "Unable to open\n\n%s\n\ndue to the error\n\n%s\n\n" % (url, e)
                msg += "It is possible that 3D information does not exist for this molecule in the PubChem database\n"
                print(msg)
                raise ValidationError(msg)
        return self.dataSDF 
Example #3
Source File: download_tlds.py    From msticpy with MIT License 6 votes vote down vote up
def _get_tlds() -> Optional[List[str]]:
    """
    Return IANA Top Level Domains.

    Returns
    -------
    Set[str]
        Set of top level domains.

    """
    try:
        req = request.Request(_TLD_LIST)
        with request.urlopen(req) as resp:  # nosec - Hard-coded URL
            txt_resp = resp.read().decode("utf-8", "ignore")
            tld_set = set(txt_resp.split("\n")[1:])  # get rid of header
            tld_set.remove("")  # get rid of blank values
            return sorted(tld_set)
    except (HTTPError, URLError) as err:
        warnings.warn(
            "Exception detected trying to retrieve IANA top-level domain list."
            + "Falling back to builtin seed list. "
            + f"{err.args}",
            RuntimeWarning,
        )
    return None 
Example #4
Source File: utils.py    From script.module.inputstreamhelper with MIT License 6 votes vote down vote up
def _http_request(url, headers=None, time_out=10):
    """Perform an HTTP request and return request"""
    log(0, 'Request URL: {url}', url=url)

    try:
        if headers:
            request = Request(url, headers=headers)
        else:
            request = Request(url)
        req = urlopen(request, timeout=time_out)
        log(0, 'Response code: {code}', code=req.getcode())
        if 400 <= req.getcode() < 600:
            raise HTTPError('HTTP %s Error for url: %s' % (req.getcode(), url), response=req)
    except (HTTPError, URLError) as err:
        log(2, 'Download failed with error {}'.format(err))
        if yesno_dialog(localize(30004), '{line1}\n{line2}'.format(line1=localize(30063), line2=localize(30065))):  # Internet down, try again?
            return _http_request(url, headers, time_out)
        return None

    return req 
Example #5
Source File: tests.py    From kicad-schlib with Creative Commons Zero v1.0 Universal 6 votes vote down vote up
def check_ds_link(url):
    if not url in datasheet_links:
        request = Request(url)
        request.get_method = lambda : 'HEAD'
        request.add_header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0")
        try:
            response = urlopen(request, timeout=5)
            datasheet_links[url] = response.getcode()
        except URLError as e:
            datasheet_links[url] = getattr(e, 'code', str(e))
        except Exception as e:
            datasheet_links[url] = str(e)
        # Some 'special' hosts dont implement the HEAD method
        if datasheet_links[url] == 405:
            try:
                request.get_method = lambda : 'GET'
                response = urlopen(request, timeout=3)
                datasheet_links[url] = response.getcode()
            except URLError as e:
                datasheet_links[url] = getattr(e, 'code', str(e))
            except Exception as e:
                datasheet_links[url] = str(e)
    return datasheet_links[url] 
Example #6
Source File: fixtures.py    From pytest-flask with MIT License 6 votes vote down vote up
def start(self):
        """Start application in a separate process."""
        def worker(app, host, port):
            app.run(host=host, port=port, use_reloader=False, threaded=True)
        self._process = multiprocessing.Process(
            target=worker,
            args=(self.app, self.host, self.port)
        )
        self._process.start()

        # We must wait for the server to start listening with a maximum
        # timeout of 5 seconds.
        timeout = 5
        while timeout > 0:
            time.sleep(1)
            try:
                urlopen(self.url())
                timeout = 0
            except URLError:
                timeout -= 1 
Example #7
Source File: github.py    From fusesoc with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _checkout(self, local_dir):
        user = self.config.get("user")
        repo = self.config.get("repo")

        version = self.config.get("version", "master")

        # TODO : Sanitize URL
        url = URL.format(user=user, repo=repo, version=version)
        logger.info("Downloading {}/{} from github".format(user, repo))
        try:
            (filename, headers) = urllib.urlretrieve(url)
        except URLError as e:
            raise RuntimeError("Failed to download '{}'. '{}'".format(url, e.reason))
        t = tarfile.open(filename)
        (cache_root, core) = os.path.split(local_dir)

        # Ugly hack to get the first part of the directory name of the extracted files
        tmp = t.getnames()[0]
        t.extractall(cache_root)
        os.rename(os.path.join(cache_root, tmp), os.path.join(cache_root, core)) 
Example #8
Source File: net.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def validate_(self, value, context=None):
        url = self.valid_url(value)
        if not url:
            raise StopValidationError(self.messages['invalid_url'])
        if self.verify_exists:
            url_string = urlquote(urlunsplit((
                url['scheme'],
                (url['host6'] or url['host4'] or url['hostn_enc']) + ':' + (url['port'] or ''),
                url['path'],
                url['query'],
                url['frag'])
                ).encode('utf-8'), safe=VALID_CHAR_STRING)
            try:
                urlopen(url_string)
            except URLError:
                raise StopValidationError(self.messages['not_found']) 
Example #9
Source File: net.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def validate_(self, value, context=None):
        url = self.valid_url(value)
        if not url:
            raise StopValidationError(self.messages['invalid_url'])
        if self.verify_exists:
            url_string = urlquote(urlunsplit((
                url['scheme'],
                (url['host6'] or url['host4'] or url['hostn_enc']) + ':' + (url['port'] or ''),
                url['path'],
                url['query'],
                url['frag'])
                ).encode('utf-8'), safe=VALID_CHAR_STRING)
            try:
                urlopen(url_string)
            except URLError:
                raise StopValidationError(self.messages['not_found']) 
Example #10
Source File: cloudfrunt.py    From cloudfrunt with MIT License 6 votes vote down vote up
def find_cf_issues(domains):

    error_domains = []

    for domain in domains:
        try:
            response = urlopen('http://' + domain)
        except HTTPError as e:
            if e.code == 403 and 'Bad request' in e.fp.read():
                try:
                    response = urlopen('https://' + domain)
                except URLError as e:
                    if 'handshake' in str(e).lower() or e.code == 403 and 'Bad request' in e.fp.read():
                        error_domains.append(domain)
                except:
                    pass
        except:
            pass

    return error_domains

# add a domain to CloudFront 
Example #11
Source File: _utils.py    From torf with GNU General Public License v3.0 6 votes vote down vote up
def download_http(url, timeout=60):
    try:
        response = urllib.request.urlopen(URL(url), timeout=timeout).read()
    except urllib.error.URLError as e:
        try:
            msg = e.args[0].strerror
        except (AttributeError, IndexError):
            msg = (getattr(e, 'msg', None) or
                   getattr(e, 'strerror', None) or
                   'Failed')
        raise error.ConnectionError(url, msg)
    except socket.timeout:
        raise error.ConnectionError(url, 'Timed out')
    except http.client.HTTPException:
        raise error.ConnectionError(url, 'No HTTP response')
    except (OSError, IOError):
        raise error.ConnectionError(url, 'Unknown error')
    else:
        return response 
Example #12
Source File: GetLocal.py    From crawler_examples with Apache License 2.0 6 votes vote down vote up
def getCountry(ipAddress):
    '''
    判断一个IP的所在地
    '''
    try:
        response = urlopen("http://freegeoip.net/json/" +
                           ipAddress).read().decode('utf-8')
    except URLError:
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        response = urlopen("http://freegeoip.net/json/" +
                           ipAddress).read().decode('utf-8')
    except:
        return 'Unknown'
    responseJson = json.loads(response)
    return responseJson.get("country_code")  # 返回国家代号 
Example #13
Source File: cloudfrunt.py    From cloudfrunt with MIT License 6 votes vote down vote up
def get_cf_ranges(cf_url):

    response = None
    ranges = []

    while response is None:
        try:
            response = urlopen(cf_url)
        except URLError as e:
            print(' [?] Got URLError trying to get CloudFront IP ranges. Retrying...')
        except:
            print(' [?] Got an unexpected error trying to get CloudFront IP ranges. Exiting...')
            raise

    cf_data = json.load(response)
    for item in cf_data['prefixes']:
        service = item.get('service')
        if service == 'CLOUDFRONT':
            ranges.append(item.get('ip_prefix'))

    return ranges

# find more domains and correct for CloudFront 
Example #14
Source File: Coordinate.py    From crawler_examples with Apache License 2.0 6 votes vote down vote up
def getLinks(articleUrl):
    '''
    分析网站的源码并返回内链
    '''
    try:
        html = urlopen("http://en.wikipedia.org" + articleUrl)
    except HTTPError:
        return None
    except URLError:
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        html = urlopen("http://en.wikipedia.org" + articleUrl)
    bsObj = BeautifulSoup(html, "lxml")
    return bsObj.find("div", {"id": "bodyContent"}).findAll("a", href=re.compile("^(/wiki/)((?!:).)*$"))


# 设置缓冲队列 
Example #15
Source File: GetLocal.py    From crawler_examples with Apache License 2.0 6 votes vote down vote up
def getHistoryIPs(pageUrl):
    pageUrl = pageUrl.replace("/wiki/", "")
    historyUrl = "http://en.wikipedia.org/w/index.php?title="+pageUrl+"&action=history"
    print("history url:", historyUrl)

    time.sleep(SLEEP_TIME)

    try:
        html = urlopen(historyUrl)
    except HTTPError:
        return None
    except URLError:
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        html = urlopen(historyUrl)
    bsObj = BeautifulSoup(html, "lxml")
    ipAddresses = bsObj.findAll("a", {"class":"mw-anonuserlink"})

    addressList = set()
    for ipAddress in ipAddresses:
        print(pageUrl+": "+ipAddress.get_text())
        addressList.add(ipAddress.get_text())
    return addressList #返回一个IP列表

# 得到所有IP的国家代号 
Example #16
Source File: RunSpider.py    From crawler_examples with Apache License 2.0 6 votes vote down vote up
def getLinks(articleUrl):
    try:
        html = urlopen("http://en.wikipedia.org"+articleUrl)
    except HTTPError:
        ServerLog.writeLog("HTTPError")
        return None
    except URLError:
        ServerLog.writeLog("URLError")
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        html = urlopen("http://en.wikipedia.org"+articleUrl)
    bsObj = BeautifulSoup(html, "lxml")
    return bsObj.find("div", {"id":"bodyContent"}).findAll("a", href=re.compile("^(/wiki/)((?!:).)*$"))


# 抓取IP 
Example #17
Source File: integration_tools.py    From sawtooth-core with Apache License 2.0 6 votes vote down vote up
def wait_until_status(url, status_code=200):
    """Pause the program until the given url returns the required status.

    Args:
        url (str): The url to query.
        status_code (int, optional): The required status code. Defaults to 200.
    """
    sleep_time = 1
    while True:
        try:
            response = urlopen(url)
            if response.getcode() == status_code:
                return

        except HTTPError as err:
            if err.code == status_code:
                return

            LOGGER.debug('failed to read url: %s', str(err))
        except URLError as err:
            LOGGER.debug('failed to read url: %s', str(err))

        LOGGER.debug('Retrying in %s secs', sleep_time)
        time.sleep(sleep_time) 
Example #18
Source File: _datasource.py    From auto-alt-text-lambda-api with MIT License 5 votes vote down vote up
def _cache(self, path):
        """Cache the file specified by path.

        Creates a copy of the file in the datasource cache.

        """
        # We import these here because importing urllib2 is slow and
        # a significant fraction of numpy's total import time.
        if sys.version_info[0] >= 3:
            from urllib.request import urlopen
            from urllib.error import URLError
        else:
            from urllib2 import urlopen
            from urllib2 import URLError

        upath = self.abspath(path)

        # ensure directory exists
        if not os.path.exists(os.path.dirname(upath)):
            os.makedirs(os.path.dirname(upath))

        # TODO: Doesn't handle compressed files!
        if self._isurl(path):
            try:
                openedurl = urlopen(path)
                f = _open(upath, 'wb')
                try:
                    shutil.copyfileobj(openedurl, f)
                finally:
                    f.close()
                    openedurl.close()
            except URLError:
                raise URLError("URL not found: %s" % path)
        else:
            shutil.copyfile(path, upath)
        return upath 
Example #19
Source File: _datasource.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _cache(self, path):
        """Cache the file specified by path.

        Creates a copy of the file in the datasource cache.

        """
        # We import these here because importing urllib2 is slow and
        # a significant fraction of numpy's total import time.
        if sys.version_info[0] >= 3:
            from urllib.request import urlopen
            from urllib.error import URLError
        else:
            from urllib2 import urlopen
            from urllib2 import URLError

        upath = self.abspath(path)

        # ensure directory exists
        if not os.path.exists(os.path.dirname(upath)):
            os.makedirs(os.path.dirname(upath))

        # TODO: Doesn't handle compressed files!
        if self._isurl(path):
            try:
                openedurl = urlopen(path)
                f = _open(upath, 'wb')
                try:
                    shutil.copyfileobj(openedurl, f)
                finally:
                    f.close()
                    openedurl.close()
            except URLError:
                raise URLError("URL not found: %s" % path)
        else:
            shutil.copyfile(path, upath)
        return upath 
Example #20
Source File: _datasource.py    From lambda-packs with MIT License 5 votes vote down vote up
def _cache(self, path):
        """Cache the file specified by path.

        Creates a copy of the file in the datasource cache.

        """
        # We import these here because importing urllib2 is slow and
        # a significant fraction of numpy's total import time.
        if sys.version_info[0] >= 3:
            from urllib.request import urlopen
            from urllib.error import URLError
        else:
            from urllib2 import urlopen
            from urllib2 import URLError

        upath = self.abspath(path)

        # ensure directory exists
        if not os.path.exists(os.path.dirname(upath)):
            os.makedirs(os.path.dirname(upath))

        # TODO: Doesn't handle compressed files!
        if self._isurl(path):
            try:
                openedurl = urlopen(path)
                f = _open(upath, 'wb')
                try:
                    shutil.copyfileobj(openedurl, f)
                finally:
                    f.close()
                    openedurl.close()
            except URLError:
                raise URLError("URL not found: %s" % path)
        else:
            shutil.copyfile(path, upath)
        return upath 
Example #21
Source File: spider.py    From crawler_examples with Apache License 2.0 5 votes vote down vote up
def getBsObj(url):
    """
    访问网址,返回一个BeautifulSoup对象。
    """
    try:
        html = urlopen(url)
    except URLError:
        print("Sleeping!")
        time.sleep(SLEEP_TIME)
        html = urlopen(url)
    except:
        return None

    bsObj = BeautifulSoup(html, 'lxml')
    return bsObj 
Example #22
Source File: GetLocal.py    From crawler_examples with Apache License 2.0 5 votes vote down vote up
def getHistoryIPs(pageUrl):
    '''
    从网页中抽取出贡献者的IP
    '''
    pageUrl = pageUrl.replace("/wiki/", "")
    historyUrl = "http://en.wikipedia.org/w/index.php?title=" + \
        pageUrl + "&action=history"
    print("history url:", historyUrl)

    time.sleep(SLEEP_TIME)

    try:
        html = urlopen(historyUrl)
    except HTTPError:
        return None
    except URLError:
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        html = urlopen(historyUrl)
    bsObj = BeautifulSoup(html, "lxml")
    ipAddresses = bsObj.findAll("a", {"class": "mw-anonuserlink"})

    addressList = set()
    for ipAddress in ipAddresses:
        print(pageUrl + ": " + ipAddress.get_text())
        addressList.add(ipAddress.get_text())
    return addressList  # 返回一个IP列表 
Example #23
Source File: domain_utils.py    From msticpy with MIT License 5 votes vote down vote up
def _get_ssl_abuselist(cls) -> pd.DataFrame:
        """Download and load abuse.ch SSL Abuse List."""
        try:
            ssl_ab_list = pd.read_csv(
                "https://sslbl.abuse.ch/blacklist/sslblacklist.csv", skiprows=8
            )
        except (ConnectionError, HTTPError, URLError):
            ssl_ab_list = pd.DataFrame({"SHA1": []})

        return ssl_ab_list 
Example #24
Source File: RunServer.py    From crawler_examples with Apache License 2.0 5 votes vote down vote up
def getLinks(articleUrl):
    try:
        html = urlopen("http://en.wikipedia.org"+articleUrl)
    except HTTPError:
        ServerLog.writeLog("HTTPError")
        return None
    except URLError:
        ServerLog.writeLog("URLError")
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        html = urlopen("http://en.wikipedia.org"+articleUrl)
    bsObj = BeautifulSoup(html, "lxml")
    return bsObj.find("div", {"id":"bodyContent"}).findAll("a", href=re.compile("^(/wiki/)((?!:).)*$"))

# 设置页面 
Example #25
Source File: afl_stats.py    From afl-utils with Apache License 2.0 5 votes vote down vote up
def twitter_init(config):
    try:
        config['twitter_creds_file'] = os.path.abspath(os.path.expanduser(config['twitter_creds_file']))
        if not os.path.exists(config['twitter_creds_file']):
            twitter.oauth_dance("fuzzer_stats", config['twitter_consumer_key'],
                                config['twitter_consumer_secret'], config['twitter_creds_file'])
        oauth_token, oauth_secret = twitter.read_token_file(config['twitter_creds_file'])
        twitter_instance = twitter.Twitter(auth=twitter.OAuth(oauth_token, oauth_secret,
                                                              config['twitter_consumer_key'],
                                                              config['twitter_consumer_secret']))
        return twitter_instance
    except (twitter.TwitterHTTPError, URLError):
        print_err("Network error, twitter login failed! Check your connection!")
        sys.exit(1) 
Example #26
Source File: GetLocal.py    From crawler_examples with Apache License 2.0 5 votes vote down vote up
def getCountry(ipAddress):
    try:
        response = urlopen("http://freegeoip.net/json/"+ipAddress).read().decode('utf-8')
    except URLError:
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        response = urlopen("http://freegeoip.net/json/"+ipAddress).read().decode('utf-8')
    except:
        return 'Unknown'
    responseJson = json.loads(response)
    return responseJson.get("country_code") # 返回国家代号

# 从网页中抽取出贡献者的IP 
Example #27
Source File: afl_stats.py    From afl-utils with Apache License 2.0 5 votes vote down vote up
def fetch_stats(config_settings, twitter_inst):
    stat_dict = dict()
    for fuzzer in config_settings['fuzz_dirs']:
        stats = load_stats(fuzzer)

        if not stats:
            continue

        sum_stats = summarize_stats(stats)

        try:
            with open('.afl_stats.{}'.format(os.path.basename(fuzzer)), 'r') as f:
                old_stats = json.load(f)
        except FileNotFoundError:
            old_stats = sum_stats.copy()

        # initialize/update stat_dict
        stat_dict[fuzzer] = (sum_stats, old_stats)

        stat_change = diff_stats(sum_stats, old_stats)

        with open('.afl_stats.{}'.format(os.path.basename(fuzzer)), 'w') as f:
            json.dump(sum_stats, f)

        print(prettify_stat(sum_stats, stat_change, True))

        tweet = prettify_stat(sum_stats, stat_change, False)

        l = len(tweet)
        c = clr.LRD if l > 140 else clr.LGN

        if twitter_inst:
            print_ok("Tweeting status (%s%d" % (c, l) + clr.RST + " chars)...")
            try:
                twitter_inst.statuses.update(status=shorten_tweet(tweet))
            except (twitter.TwitterHTTPError, URLError):
                print_warn("Problem connecting to Twitter! Tweet not sent!")
            except Exception as e:
                print_err("Sending tweet failed (Reason: " + clr.GRA + "%s" % e.__cause__ + clr.RST + ")") 
Example #28
Source File: bigsuds.py    From bigsuds with MIT License 5 votes vote down vote up
def _create_client(self, wsdl_name):
        try:
            client = get_client(self._hostname, wsdl_name, self._username,
                                self._password, self._cachedir, self._verify,
                                self._timeout,self._port)
        except SAXParseException as e:
            raise ParseError('%s\nFailed to parse wsdl. Is "%s" a valid '
                    'namespace?' % (e, wsdl_name))
        # One situation that raises TransportError is when credentials are bad.
        except (URLError, TransportError) as e:
            raise ConnectionError(str(e))
        return self._create_client_wrapper(client, wsdl_name) 
Example #29
Source File: methods.py    From cryptolens-python with MIT License 5 votes vote down vote up
def get_key(token, rsa_pub_key, product_id, key, fields_to_return = 0,\
                 metadata = False, floating_time_interval = 0):
        
        """
        Calls the GetKey method in Web API 3 and returns a tuple containing
        (LicenseKey, Message). If an error occurs, LicenseKey will be None. If
        everything went well, no message will be returned.
        
        More docs: https://app.cryptolens.io/docs/api/v3/GetKey
        """
        
        response = Response("","",0,"")
        
        try:
            response = Response.from_string(HelperMethods.send_request("key/getkey", {"token":token,\
                                                  "ProductId":product_id,\
                                                  "key":key,\
                                                  "FieldsToReturn":fields_to_return,\
                                                  "metadata":metadata,\
                                                  "FloatingTimeInterval": floating_time_interval,\
                                                  "Sign":"True",\
                                                  "SignMethod":1}))
        except HTTPError as e:
            response = Response.from_string(e.read())
        except URLError as e:
            return (None, "Could not contact the server. Error message: " + str(e))
        except Exception:
            return (None, "Could not contact the server.")
        
        pubkey = RSAPublicKey.from_string(rsa_pub_key)
    
        if response.result == 1:
            return (None, response.message)
        else:
            try:
                if HelperMethods.verify_signature(response, pubkey):
                    return (LicenseKey.from_response(response), response.message)
                else:
                    return (None, "The signature check failed.")
            except Exception:
                return (None, "The signature check failed.") 
Example #30
Source File: test__datasource.py    From lambda-packs with MIT License 5 votes vote down vote up
def test_InvalidHTTPCacheURLError(self):
        self.assertRaises(URLError, self.ds._cache, invalid_httpurl())