Python Examples of urllib.error.URLError

Source File: download.py From gog-galaxy-plugin-downloader with GNU General Public License v3.0

9 votes

def get_plugin_config(config_uri):
    """
    Downloads/opens configuration yaml file, returns
    dict of Galaxy plugins
    """
    # Try to open the URI as a URL or fall back to opening local file
    try:
        config_uri_parsed = urlparse(config_uri)
        if config_uri_parsed.scheme in ['https', 'http']:
            url = urlopen(config_uri)
            yaml_data = url.read()
        else:
            with open(config_uri, 'r') as file_data:
                yaml_data = file_data.read()
    except URLError as e:
        print(e)

    # Parse the YAML configuration
    try:
        plugin_data = yaml.safe_load(yaml_data)

        return plugin_data['plugins']
    except yaml.YAMLError as e:
        print(e)

Source File: pubchem.py From QCElemental with BSD 3-Clause "New" or "Revised" License

6 votes

def get_sdf(self):
        """Function to return the SDF (structure-data file) of the PubChem object."""
        from urllib.request import urlopen, Request
        from urllib.parse import quote
        from urllib.error import URLError

        if len(self.dataSDF) == 0:
            url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/SDF?record_type=3d".format(
                quote(str(self.cid))
            )
            req = Request(url, headers={"Accept": "chemical/x-mdl-sdfile"})
            try:
                self.dataSDF = urlopen(req).read().decode("utf-8")
            except URLError as e:
                msg = "Unable to open\n\n%s\n\ndue to the error\n\n%s\n\n" % (url, e)
                msg += "It is possible that 3D information does not exist for this molecule in the PubChem database\n"
                print(msg)
                raise ValidationError(msg)
        return self.dataSDF

Source File: download_tlds.py From msticpy with MIT License

6 votes

def _get_tlds() -> Optional[List[str]]:
    """
    Return IANA Top Level Domains.

    Returns
    -------
    Set[str]
        Set of top level domains.

    """
    try:
        req = request.Request(_TLD_LIST)
        with request.urlopen(req) as resp:  # nosec - Hard-coded URL
            txt_resp = resp.read().decode("utf-8", "ignore")
            tld_set = set(txt_resp.split("\n")[1:])  # get rid of header
            tld_set.remove("")  # get rid of blank values
            return sorted(tld_set)
    except (HTTPError, URLError) as err:
        warnings.warn(
            "Exception detected trying to retrieve IANA top-level domain list."
            + "Falling back to builtin seed list. "
            + f"{err.args}",
            RuntimeWarning,
        )
    return None

Source File: utils.py From script.module.inputstreamhelper with MIT License

6 votes

def _http_request(url, headers=None, time_out=10):
    """Perform an HTTP request and return request"""
    log(0, 'Request URL: {url}', url=url)

    try:
        if headers:
            request = Request(url, headers=headers)
        else:
            request = Request(url)
        req = urlopen(request, timeout=time_out)
        log(0, 'Response code: {code}', code=req.getcode())
        if 400 <= req.getcode() < 600:
            raise HTTPError('HTTP %s Error for url: %s' % (req.getcode(), url), response=req)
    except (HTTPError, URLError) as err:
        log(2, 'Download failed with error {}'.format(err))
        if yesno_dialog(localize(30004), '{line1}\n{line2}'.format(line1=localize(30063), line2=localize(30065))):  # Internet down, try again?
            return _http_request(url, headers, time_out)
        return None

    return req

Source File: tests.py From kicad-schlib with Creative Commons Zero v1.0 Universal

6 votes

def check_ds_link(url):
    if not url in datasheet_links:
        request = Request(url)
        request.get_method = lambda : 'HEAD'
        request.add_header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0")
        try:
            response = urlopen(request, timeout=5)
            datasheet_links[url] = response.getcode()
        except URLError as e:
            datasheet_links[url] = getattr(e, 'code', str(e))
        except Exception as e:
            datasheet_links[url] = str(e)
        # Some 'special' hosts dont implement the HEAD method
        if datasheet_links[url] == 405:
            try:
                request.get_method = lambda : 'GET'
                response = urlopen(request, timeout=3)
                datasheet_links[url] = response.getcode()
            except URLError as e:
                datasheet_links[url] = getattr(e, 'code', str(e))
            except Exception as e:
                datasheet_links[url] = str(e)
    return datasheet_links[url]

Source File: fixtures.py From pytest-flask with MIT License

6 votes

def start(self):
        """Start application in a separate process."""
        def worker(app, host, port):
            app.run(host=host, port=port, use_reloader=False, threaded=True)
        self._process = multiprocessing.Process(
            target=worker,
            args=(self.app, self.host, self.port)
        )
        self._process.start()

        # We must wait for the server to start listening with a maximum
        # timeout of 5 seconds.
        timeout = 5
        while timeout > 0:
            time.sleep(1)
            try:
                urlopen(self.url())
                timeout = 0
            except URLError:
                timeout -= 1

Source File: github.py From fusesoc with BSD 2-Clause "Simplified" License

6 votes

def _checkout(self, local_dir):
        user = self.config.get("user")
        repo = self.config.get("repo")

        version = self.config.get("version", "master")

        # TODO : Sanitize URL
        url = URL.format(user=user, repo=repo, version=version)
        logger.info("Downloading {}/{} from github".format(user, repo))
        try:
            (filename, headers) = urllib.urlretrieve(url)
        except URLError as e:
            raise RuntimeError("Failed to download '{}'. '{}'".format(url, e.reason))
        t = tarfile.open(filename)
        (cache_root, core) = os.path.split(local_dir)

        # Ugly hack to get the first part of the directory name of the extracted files
        tmp = t.getnames()[0]
        t.extractall(cache_root)
        os.rename(os.path.join(cache_root, tmp), os.path.join(cache_root, core))

Source File: net.py From misp42splunk with GNU Lesser General Public License v3.0

6 votes

def validate_(self, value, context=None):
        url = self.valid_url(value)
        if not url:
            raise StopValidationError(self.messages['invalid_url'])
        if self.verify_exists:
            url_string = urlquote(urlunsplit((
                url['scheme'],
                (url['host6'] or url['host4'] or url['hostn_enc']) + ':' + (url['port'] or ''),
                url['path'],
                url['query'],
                url['frag'])
                ).encode('utf-8'), safe=VALID_CHAR_STRING)
            try:
                urlopen(url_string)
            except URLError:
                raise StopValidationError(self.messages['not_found'])

Source File: net.py From misp42splunk with GNU Lesser General Public License v3.0

6 votes

def validate_(self, value, context=None):
        url = self.valid_url(value)
        if not url:
            raise StopValidationError(self.messages['invalid_url'])
        if self.verify_exists:
            url_string = urlquote(urlunsplit((
                url['scheme'],
                (url['host6'] or url['host4'] or url['hostn_enc']) + ':' + (url['port'] or ''),
                url['path'],
                url['query'],
                url['frag'])
                ).encode('utf-8'), safe=VALID_CHAR_STRING)
            try:
                urlopen(url_string)
            except URLError:
                raise StopValidationError(self.messages['not_found'])

Source File: cloudfrunt.py From cloudfrunt with MIT License

6 votes

def find_cf_issues(domains):

    error_domains = []

    for domain in domains:
        try:
            response = urlopen('http://' + domain)
        except HTTPError as e:
            if e.code == 403 and 'Bad request' in e.fp.read():
                try:
                    response = urlopen('https://' + domain)
                except URLError as e:
                    if 'handshake' in str(e).lower() or e.code == 403 and 'Bad request' in e.fp.read():
                        error_domains.append(domain)
                except:
                    pass
        except:
            pass

    return error_domains

# add a domain to CloudFront

Source File: _utils.py From torf with GNU General Public License v3.0

6 votes

def download_http(url, timeout=60):
    try:
        response = urllib.request.urlopen(URL(url), timeout=timeout).read()
    except urllib.error.URLError as e:
        try:
            msg = e.args[0].strerror
        except (AttributeError, IndexError):
            msg = (getattr(e, 'msg', None) or
                   getattr(e, 'strerror', None) or
                   'Failed')
        raise error.ConnectionError(url, msg)
    except socket.timeout:
        raise error.ConnectionError(url, 'Timed out')
    except http.client.HTTPException:
        raise error.ConnectionError(url, 'No HTTP response')
    except (OSError, IOError):
        raise error.ConnectionError(url, 'Unknown error')
    else:
        return response

Source File: GetLocal.py From crawler_examples with Apache License 2.0

6 votes

def getCountry(ipAddress):
    '''
    判断一个IP的所在地
    '''
    try:
        response = urlopen("http://freegeoip.net/json/" +
                           ipAddress).read().decode('utf-8')
    except URLError:
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        response = urlopen("http://freegeoip.net/json/" +
                           ipAddress).read().decode('utf-8')
    except:
        return 'Unknown'
    responseJson = json.loads(response)
    return responseJson.get("country_code")  # 返回国家代号

Source File: cloudfrunt.py From cloudfrunt with MIT License

6 votes

def get_cf_ranges(cf_url):

    response = None
    ranges = []

    while response is None:
        try:
            response = urlopen(cf_url)
        except URLError as e:
            print(' [?] Got URLError trying to get CloudFront IP ranges. Retrying...')
        except:
            print(' [?] Got an unexpected error trying to get CloudFront IP ranges. Exiting...')
            raise

    cf_data = json.load(response)
    for item in cf_data['prefixes']:
        service = item.get('service')
        if service == 'CLOUDFRONT':
            ranges.append(item.get('ip_prefix'))

    return ranges

# find more domains and correct for CloudFront

Source File: Coordinate.py From crawler_examples with Apache License 2.0

6 votes

def getLinks(articleUrl):
    '''
    分析网站的源码并返回内链
    '''
    try:
        html = urlopen("http://en.wikipedia.org" + articleUrl)
    except HTTPError:
        return None
    except URLError:
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        html = urlopen("http://en.wikipedia.org" + articleUrl)
    bsObj = BeautifulSoup(html, "lxml")
    return bsObj.find("div", {"id": "bodyContent"}).findAll("a", href=re.compile("^(/wiki/)((?!:).)*$"))


# 设置缓冲队列

Source File: GetLocal.py From crawler_examples with Apache License 2.0

6 votes

def getHistoryIPs(pageUrl):
    pageUrl = pageUrl.replace("/wiki/", "")
    historyUrl = "http://en.wikipedia.org/w/index.php?title="+pageUrl+"&action=history"
    print("history url:", historyUrl)

    time.sleep(SLEEP_TIME)

    try:
        html = urlopen(historyUrl)
    except HTTPError:
        return None
    except URLError:
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        html = urlopen(historyUrl)
    bsObj = BeautifulSoup(html, "lxml")
    ipAddresses = bsObj.findAll("a", {"class":"mw-anonuserlink"})

    addressList = set()
    for ipAddress in ipAddresses:
        print(pageUrl+": "+ipAddress.get_text())
        addressList.add(ipAddress.get_text())
    return addressList #返回一个IP列表

# 得到所有IP的国家代号

Source File: RunSpider.py From crawler_examples with Apache License 2.0

6 votes

def getLinks(articleUrl):
    try:
        html = urlopen("http://en.wikipedia.org"+articleUrl)
    except HTTPError:
        ServerLog.writeLog("HTTPError")
        return None
    except URLError:
        ServerLog.writeLog("URLError")
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        html = urlopen("http://en.wikipedia.org"+articleUrl)
    bsObj = BeautifulSoup(html, "lxml")
    return bsObj.find("div", {"id":"bodyContent"}).findAll("a", href=re.compile("^(/wiki/)((?!:).)*$"))


# 抓取IP

Source File: integration_tools.py From sawtooth-core with Apache License 2.0

6 votes

def wait_until_status(url, status_code=200):
    """Pause the program until the given url returns the required status.

    Args:
        url (str): The url to query.
        status_code (int, optional): The required status code. Defaults to 200.
    """
    sleep_time = 1
    while True:
        try:
            response = urlopen(url)
            if response.getcode() == status_code:
                return

        except HTTPError as err:
            if err.code == status_code:
                return

            LOGGER.debug('failed to read url: %s', str(err))
        except URLError as err:
            LOGGER.debug('failed to read url: %s', str(err))

        LOGGER.debug('Retrying in %s secs', sleep_time)
        time.sleep(sleep_time)

Source File: _datasource.py From auto-alt-text-lambda-api with MIT License

5 votes

def _cache(self, path):
        """Cache the file specified by path.

        Creates a copy of the file in the datasource cache.

        """
        # We import these here because importing urllib2 is slow and
        # a significant fraction of numpy's total import time.
        if sys.version_info[0] >= 3:
            from urllib.request import urlopen
            from urllib.error import URLError
        else:
            from urllib2 import urlopen
            from urllib2 import URLError

        upath = self.abspath(path)

        # ensure directory exists
        if not os.path.exists(os.path.dirname(upath)):
            os.makedirs(os.path.dirname(upath))

        # TODO: Doesn't handle compressed files!
        if self._isurl(path):
            try:
                openedurl = urlopen(path)
                f = _open(upath, 'wb')
                try:
                    shutil.copyfileobj(openedurl, f)
                finally:
                    f.close()
                    openedurl.close()
            except URLError:
                raise URLError("URL not found: %s" % path)
        else:
            shutil.copyfile(path, upath)
        return upath

Source File: _datasource.py From vnpy_crypto with MIT License

5 votes

def _cache(self, path):
        """Cache the file specified by path.

        Creates a copy of the file in the datasource cache.

        """
        # We import these here because importing urllib2 is slow and
        # a significant fraction of numpy's total import time.
        if sys.version_info[0] >= 3:
            from urllib.request import urlopen
            from urllib.error import URLError
        else:
            from urllib2 import urlopen
            from urllib2 import URLError

        upath = self.abspath(path)

        # ensure directory exists
        if not os.path.exists(os.path.dirname(upath)):
            os.makedirs(os.path.dirname(upath))

        # TODO: Doesn't handle compressed files!
        if self._isurl(path):
            try:
                openedurl = urlopen(path)
                f = _open(upath, 'wb')
                try:
                    shutil.copyfileobj(openedurl, f)
                finally:
                    f.close()
                    openedurl.close()
            except URLError:
                raise URLError("URL not found: %s" % path)
        else:
            shutil.copyfile(path, upath)
        return upath

Source File: _datasource.py From lambda-packs with MIT License

5 votes

def _cache(self, path):
        """Cache the file specified by path.

        Creates a copy of the file in the datasource cache.

        """
        # We import these here because importing urllib2 is slow and
        # a significant fraction of numpy's total import time.
        if sys.version_info[0] >= 3:
            from urllib.request import urlopen
            from urllib.error import URLError
        else:
            from urllib2 import urlopen
            from urllib2 import URLError

        upath = self.abspath(path)

        # ensure directory exists
        if not os.path.exists(os.path.dirname(upath)):
            os.makedirs(os.path.dirname(upath))

        # TODO: Doesn't handle compressed files!
        if self._isurl(path):
            try:
                openedurl = urlopen(path)
                f = _open(upath, 'wb')
                try:
                    shutil.copyfileobj(openedurl, f)
                finally:
                    f.close()
                    openedurl.close()
            except URLError:
                raise URLError("URL not found: %s" % path)
        else:
            shutil.copyfile(path, upath)
        return upath

Source File: spider.py From crawler_examples with Apache License 2.0

5 votes

def getBsObj(url):
    """
    访问网址，返回一个BeautifulSoup对象。
    """
    try:
        html = urlopen(url)
    except URLError:
        print("Sleeping!")
        time.sleep(SLEEP_TIME)
        html = urlopen(url)
    except:
        return None

    bsObj = BeautifulSoup(html, 'lxml')
    return bsObj

Source File: GetLocal.py From crawler_examples with Apache License 2.0

5 votes

def getHistoryIPs(pageUrl):
    '''
    从网页中抽取出贡献者的IP
    '''
    pageUrl = pageUrl.replace("/wiki/", "")
    historyUrl = "http://en.wikipedia.org/w/index.php?title=" + \
        pageUrl + "&action=history"
    print("history url:", historyUrl)

    time.sleep(SLEEP_TIME)

    try:
        html = urlopen(historyUrl)
    except HTTPError:
        return None
    except URLError:
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        html = urlopen(historyUrl)
    bsObj = BeautifulSoup(html, "lxml")
    ipAddresses = bsObj.findAll("a", {"class": "mw-anonuserlink"})

    addressList = set()
    for ipAddress in ipAddresses:
        print(pageUrl + ": " + ipAddress.get_text())
        addressList.add(ipAddress.get_text())
    return addressList  # 返回一个IP列表

Source File: domain_utils.py From msticpy with MIT License

5 votes

def _get_ssl_abuselist(cls) -> pd.DataFrame:
        """Download and load abuse.ch SSL Abuse List."""
        try:
            ssl_ab_list = pd.read_csv(
                "https://sslbl.abuse.ch/blacklist/sslblacklist.csv", skiprows=8
            )
        except (ConnectionError, HTTPError, URLError):
            ssl_ab_list = pd.DataFrame({"SHA1": []})

        return ssl_ab_list

Source File: RunServer.py From crawler_examples with Apache License 2.0

5 votes

def getLinks(articleUrl):
    try:
        html = urlopen("http://en.wikipedia.org"+articleUrl)
    except HTTPError:
        ServerLog.writeLog("HTTPError")
        return None
    except URLError:
        ServerLog.writeLog("URLError")
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        html = urlopen("http://en.wikipedia.org"+articleUrl)
    bsObj = BeautifulSoup(html, "lxml")
    return bsObj.find("div", {"id":"bodyContent"}).findAll("a", href=re.compile("^(/wiki/)((?!:).)*$"))

# 设置页面

Source File: afl_stats.py From afl-utils with Apache License 2.0

5 votes

def twitter_init(config):
    try:
        config['twitter_creds_file'] = os.path.abspath(os.path.expanduser(config['twitter_creds_file']))
        if not os.path.exists(config['twitter_creds_file']):
            twitter.oauth_dance("fuzzer_stats", config['twitter_consumer_key'],
                                config['twitter_consumer_secret'], config['twitter_creds_file'])
        oauth_token, oauth_secret = twitter.read_token_file(config['twitter_creds_file'])
        twitter_instance = twitter.Twitter(auth=twitter.OAuth(oauth_token, oauth_secret,
                                                              config['twitter_consumer_key'],
                                                              config['twitter_consumer_secret']))
        return twitter_instance
    except (twitter.TwitterHTTPError, URLError):
        print_err("Network error, twitter login failed! Check your connection!")
        sys.exit(1)

Source File: GetLocal.py From crawler_examples with Apache License 2.0

5 votes

def getCountry(ipAddress):
    try:
        response = urlopen("http://freegeoip.net/json/"+ipAddress).read().decode('utf-8')
    except URLError:
        print("Sleeping!")
        time.sleep(URLERROR_SLEEP_TIME)
        response = urlopen("http://freegeoip.net/json/"+ipAddress).read().decode('utf-8')
    except:
        return 'Unknown'
    responseJson = json.loads(response)
    return responseJson.get("country_code") # 返回国家代号

# 从网页中抽取出贡献者的IP

Source File: afl_stats.py From afl-utils with Apache License 2.0

5 votes

def fetch_stats(config_settings, twitter_inst):
    stat_dict = dict()
    for fuzzer in config_settings['fuzz_dirs']:
        stats = load_stats(fuzzer)

        if not stats:
            continue

        sum_stats = summarize_stats(stats)

        try:
            with open('.afl_stats.{}'.format(os.path.basename(fuzzer)), 'r') as f:
                old_stats = json.load(f)
        except FileNotFoundError:
            old_stats = sum_stats.copy()

        # initialize/update stat_dict
        stat_dict[fuzzer] = (sum_stats, old_stats)

        stat_change = diff_stats(sum_stats, old_stats)

        with open('.afl_stats.{}'.format(os.path.basename(fuzzer)), 'w') as f:
            json.dump(sum_stats, f)

        print(prettify_stat(sum_stats, stat_change, True))

        tweet = prettify_stat(sum_stats, stat_change, False)

        l = len(tweet)
        c = clr.LRD if l > 140 else clr.LGN

        if twitter_inst:
            print_ok("Tweeting status (%s%d" % (c, l) + clr.RST + " chars)...")
            try:
                twitter_inst.statuses.update(status=shorten_tweet(tweet))
            except (twitter.TwitterHTTPError, URLError):
                print_warn("Problem connecting to Twitter! Tweet not sent!")
            except Exception as e:
                print_err("Sending tweet failed (Reason: " + clr.GRA + "%s" % e.__cause__ + clr.RST + ")")

Source File: bigsuds.py From bigsuds with MIT License

5 votes

def _create_client(self, wsdl_name):
        try:
            client = get_client(self._hostname, wsdl_name, self._username,
                                self._password, self._cachedir, self._verify,
                                self._timeout,self._port)
        except SAXParseException as e:
            raise ParseError('%s\nFailed to parse wsdl. Is "%s" a valid '
                    'namespace?' % (e, wsdl_name))
        # One situation that raises TransportError is when credentials are bad.
        except (URLError, TransportError) as e:
            raise ConnectionError(str(e))
        return self._create_client_wrapper(client, wsdl_name)

Source File: methods.py From cryptolens-python with MIT License

5 votes

def get_key(token, rsa_pub_key, product_id, key, fields_to_return = 0,\
                 metadata = False, floating_time_interval = 0):
        
        """
        Calls the GetKey method in Web API 3 and returns a tuple containing
        (LicenseKey, Message). If an error occurs, LicenseKey will be None. If
        everything went well, no message will be returned.
        
        More docs: https://app.cryptolens.io/docs/api/v3/GetKey
        """
        
        response = Response("","",0,"")
        
        try:
            response = Response.from_string(HelperMethods.send_request("key/getkey", {"token":token,\
                                                  "ProductId":product_id,\
                                                  "key":key,\
                                                  "FieldsToReturn":fields_to_return,\
                                                  "metadata":metadata,\
                                                  "FloatingTimeInterval": floating_time_interval,\
                                                  "Sign":"True",\
                                                  "SignMethod":1}))
        except HTTPError as e:
            response = Response.from_string(e.read())
        except URLError as e:
            return (None, "Could not contact the server. Error message: " + str(e))
        except Exception:
            return (None, "Could not contact the server.")
        
        pubkey = RSAPublicKey.from_string(rsa_pub_key)
    
        if response.result == 1:
            return (None, response.message)
        else:
            try:
                if HelperMethods.verify_signature(response, pubkey):
                    return (LicenseKey.from_response(response), response.message)
                else:
                    return (None, "The signature check failed.")
            except Exception:
                return (None, "The signature check failed.")

Source File: test__datasource.py From lambda-packs with MIT License

5 votes

def test_InvalidHTTPCacheURLError(self):
        self.assertRaises(URLError, self.ds._cache, invalid_httpurl())

Python urllib.error.URLError() Examples