Python Examples of urllib.parse.unquote

Source File: elastic.py From ivre with GNU General Public License v3.0

7 votes

def __init__(self, url):
        super(ElasticDB, self).__init__()
        self.username = ''
        self.password = ''
        self.hosts = None
        if '@' in url.netloc:
            username, hostname = url.netloc.split('@', 1)
            if ':' in username:
                self.username, self.password = (unquote(val) for val in
                                                username.split(':', 1))
            else:
                self.username = unquote(username)
            if hostname:
                self.hosts = [hostname]
        elif url.netloc:
            self.hosts = [url.netloc]
        index_prefix = url.path.lstrip('/')
        if index_prefix:
            self.index_prefix = index_prefix + '-'
        else:
            self.index_prefix = 'ivre-'
        self.params = dict(x.split('=', 1) if '=' in x else (x, None)
                           for x in url.query.split('&') if x)

Source File: server.py From RPGBot with GNU General Public License v3.0

6 votes

def getguild(self, request: web.Request):
        guild = int(request.match_info['guild'])
        req = f"""SELECT info FROM guilddata WHERE UUID = $1"""
        async with self.bot.db._conn.acquire() as connection:
            response = await connection.fetchval(req, guild)
        if response:
            data = json.loads(response)

            fdata = data
            if request.match_info['tail']:
                for item in request.match_info['tail'].split("/"):
                    if not item:
                        continue
                    try:
                        key = unquote(item)
                        if isinstance(fdata, list):
                            key = int(key)
                        fdata = fdata[key]
                    except:
                        raise web.HTTPNotFound()

            return web.json_response(fdata)
        raise web.HTTPForbidden()

    # @server.route("/", methods=["GET"])

Source File: http_server.py From avocado-vt with GNU General Public License v2.0

6 votes

def translate_path(self, path):
        """
        Translate a /-separated PATH to the local filename syntax.

        Components that mean special things to the local file system
        (e.g. drive or directory names) are ignored.  (XXX They should
        probably be diagnosed.)

        """
        # abandon query parameters
        path = urlparse(to_text(path))[2]
        path = posixpath.normpath(unquote(path))
        words = path.split('/')
        words = list(filter(None, words))
        path = self.server.cwd
        for word in words:
            _, word = os.path.splitdrive(word)
            _, word = os.path.split(word)
            if word in (os.curdir, os.pardir):
                continue
            path = os.path.join(path, word)
        return path

Source File: __init__.py From Wikipedia-API with MIT License

6 votes

def article(
            self,
            title: str,
            ns: WikiNamespace = Namespace.MAIN,
            unquote: bool = False
    ) -> 'WikipediaPage':
        """
        Constructs Wikipedia page with title `title`.

        This function is an alias for :func:`page`

        :param title: page title as used in Wikipedia URL
        :param ns: :class:`WikiNamespace`
        :param unquote: if true it will unquote title
        :return: object representing :class:`WikipediaPage`
        """
        return self.page(
            title=title,
            ns=ns,
            unquote=unquote,
        )

Source File: epr.py From epr with MIT License

6 votes

def handle_starttag(self, tag, attrs):
        if re.match("h[1-6]", tag) is not None:
            self.ishead = True
        elif tag in self.inde:
            self.isinde = True
        elif tag in self.pref:
            self.ispref = True
        elif tag in self.bull:
            self.isbull = True
        elif tag in self.hide:
            self.ishidden = True
        elif tag == "sup":
            self.text[-1] += "^{"
        elif tag == "sub":
            self.text[-1] += "_{"
        elif tag == "image":
            for i in attrs:
                if i[0] == "xlink:href":
                    self.text.append("[IMG:{}]".format(len(self.imgs)))
                    self.imgs.append(unquote(i[1]))

Source File: cache.py From openSUSE-release-tools with GNU General Public License v2.0

6 votes

def put(url, data):
        url = unquote(url)
        match, project = Cache.match(url)
        if match:
            path = Cache.path(url, project, include_file=True, makedirs=True)
            ttl = Cache.PATTERNS[match]
            if ttl == 0:
                return data

            # Since urlopen does not return a seekable stream it cannot be reset
            # after writing to cache. As such a wrapper must be used. This could
            # be replaced with urlopen('file://...') to be consistent, but until
            # the need arrises BytesIO has less overhead.
            text = data.read()
            data = BytesIO(text)

            if conf.config['debug']: print('CACHE_PUT', url, project, file=sys.stderr)
            f = open(path, 'wb')
            f.write(text)
            f.close()

        return data

Source File: read_epub.py From Lector with GNU General Public License v3.0

6 votes

def find_file(self, filename):
        # Get rid of special characters
        filename = unquote(filename)

        # First, look for the file in the root of the book
        if filename in self.file_list:
            return filename

        # Then search for it elsewhere
        else:
            file_basename = os.path.basename(filename)
            for i in self.file_list:
                if os.path.basename(i) == file_basename:
                    return i

        # If the file isn't found
        logger.warning(filename + ' not found in ' + self.book_filename)
        return False

Source File: test_wallet.py From pyqiwi with MIT License

6 votes

def test_form_link(self):
        data = {
            'pid': 99,
            'account': 79000000000,
            'amount': 123,
            'comment': 'Hey, it works!'
        }
        paylink = pyqiwi.generate_form_link(**data)
        result = url_params(unquote(paylink))
        data.pop('pid') # It is not on params, it's in URL
        # Qiwi requires for amount to be split into integer and fraction
        data = merge_dicts(data, split_float(data.get('amount')))
        data.pop('amount')
        # unquote won't process + to <Space>, but Qiwi should
        if result.get("extra['comment']"):
            result["extra['comment']"] = result["extra['comment']"].replace('+', ' ')
        for key in data:
            if key == 'account':
                assert result["extra['account']"] == str(data[key])
            elif key == 'comment':
                assert result["extra['comment']"] == str(data[key])

Source File: headers.py From sanic with MIT License

6 votes

def fwd_normalize(fwd: OptionsIterable) -> Options:
    """Normalize and convert values extracted from forwarded headers."""
    ret: Dict[str, Union[int, str]] = {}
    for key, val in fwd:
        if val is not None:
            try:
                if key in ("by", "for"):
                    ret[key] = fwd_normalize_address(val)
                elif key in ("host", "proto"):
                    ret[key] = val.lower()
                elif key == "port":
                    ret[key] = int(val)
                elif key == "path":
                    ret[key] = unquote(val)
                else:
                    ret[key] = val
            except ValueError:
                pass
    return ret

Source File: test_response.py From sanic with MIT License

6 votes

def test_file_response_custom_filename(
    app, source, dest, static_file_directory
):
    @app.route("/files/<filename>", methods=["GET"])
    def file_route(request, filename):
        file_path = os.path.join(static_file_directory, filename)
        file_path = os.path.abspath(unquote(file_path))
        return file(file_path, filename=dest)

    request, response = app.test_client.get(f"/files/{source}")
    assert response.status == 200
    assert response.body == get_file_content(static_file_directory, source)
    assert (
        response.headers["Content-Disposition"]
        == f'attachment; filename="{dest}"'
    )

Source File: cache.py From openSUSE-release-tools with GNU General Public License v2.0

6 votes

def put(url, data):
        url = unquote(url)
        match, project = Cache.match(url)
        if match:
            path = Cache.path(url, project, include_file=True, makedirs=True)
            ttl = Cache.PATTERNS[match]
            if ttl == 0:
                return data

            # Since urlopen does not return a seekable stream it cannot be reset
            # after writing to cache. As such a wrapper must be used. This could
            # be replaced with urlopen('file://...') to be consistent, but until
            # the need arrises BytesIO has less overhead.
            text = data.read()
            data = BytesIO(text)

            if conf.config['debug']: print('CACHE_PUT', url, project, file=sys.stderr)
            f = open(path, 'wb')
            f.write(text)
            f.close()

        return data

Source File: cache.py From openSUSE-release-tools with GNU General Public License v2.0

6 votes

def delete(url):
        url = unquote(url)
        match, project = Cache.match(url)
        if match:
            path = Cache.path(url, project, include_file=True)

            # Rather then wait for last updated statistics to expire, remove the
            # project cache if applicable.
            if project:
                apiurl, _ = Cache.spliturl(url)
                if project.isdigit():
                    # Clear target project cache upon request acceptance.
                    project = osc.core.get_request(apiurl, project).actions[0].tgt_project
                Cache.delete_project(apiurl, project)

            if os.path.exists(path):
                if conf.config['debug']: print('CACHE_DELETE', url, file=sys.stderr)
                os.remove(path)

        # Also delete version without query. This does not handle other
        # variations using different query strings. Handy for PUT with ?force=1.
        o = urlsplit(url)
        if o.query != '':
            url_plain = SplitResult(o.scheme, o.netloc, o.path, '', o.fragment).geturl()
            Cache.delete(url_plain)

Source File: test_response.py From sanic with MIT License

6 votes

def test_file_stream_response_custom_filename(
    app, source, dest, static_file_directory
):
    @app.route("/files/<filename>", methods=["GET"])
    def file_route(request, filename):
        file_path = os.path.join(static_file_directory, filename)
        file_path = os.path.abspath(unquote(file_path))
        return file_stream(file_path, chunk_size=32, filename=dest)

    request, response = app.test_client.get(f"/files/{source}")
    assert response.status == 200
    assert response.body == get_file_content(static_file_directory, source)
    assert (
        response.headers["Content-Disposition"]
        == f'attachment; filename="{dest}"'
    )

Source File: compatibility_utils.py From Lector with GNU General Public License v3.0

5 votes

def unquoteurl(href):
    if isinstance(href,binary_type):
        href = href.decode('utf-8')
    href = unquote(href)
    return href

# unescape html

Source File: no_notebook.py From vpython-jupyter with MIT License

5 votes

def do_GET(self):
        global httpserving
        httpserving = True
        html = False
        if self.path == "/":
            self.path = 'glowcomm.html'
            html = True
        elif self.path[0] == "/":
            self.path = os.sep + self.path[1:]
        f = self.path.rfind('.')
        fext = None
        if f > 0:
            fext = self.path[f + 1:]
        if fext in self.mimes:
            mime = self.mimes[fext]
            # For example, mime[0] is image/jpg,
            # mime[1] is C:\Users\Bruce\Anaconda3\lib\site-packages\vpython\vpython_data
            self.send_response(200)
            self.send_header('Content-type', mime[0])
            self.end_headers()
            if not html:
                path = unquote(self.path)  # convert %20 to space, for example
                # Now path can be for example \Fig 4.6.jpg
                # user current working directory, e.g. D:\Documents\0GlowScriptWork\LocalServer
                cwd = os.getcwd()
                loc = cwd + path
                if not os.path.isfile(loc):
                    loc = mime[1] + path  # look in vpython_data
                fd = open(loc, 'rb')
                self.wfile.write(fd.read())
            else:
                # string.encode() is not available in Python 2.7, but neither is async
                self.wfile.write(glowcomm.encode('utf-8'))

Source File: Struts2Scan.py From Struts2-Scan with GNU General Public License v3.0

5 votes

def exec_cmd(self, cmd):
        """执行命令"""
        payload = self.exec_payload.format(cmd=quote(cmd))
        html = get(self.url + "{payload}.action".format(payload=payload), self.headers, self.encoding)
        if html.startswith('ERROR:'):
            return html
        try:
            soup = BeautifulSoup(html, 'lxml')
            ps = soup.find_all('p')
            result = unquote(ps[1].text[9:-4]).strip()
            return result
        except Exception as e:
            return html

Source File: api.py From baidupcsapi with MIT License

5 votes

def share_dlink_for_fs_ids(self, fsid_list, shareid, uk, sign):
        # TODO: 需要文档
        url = "https://pan.baidu.com/api/sharedownload"
        sekey = json.dumps({"sekey": urlparse.unquote(self.session.cookies["BDCLND"])})
        data = {
            "encrypt": 0,
            "extra": sekey,
            "uk": uk,
            "primaryid": shareid,
            "product": "share",
            "fid_list": json.dumps(fsid_list),
        }
        reqheader = {"Referer": "https://pan.baidu.com/share/link?shareid=" + shareid + "&uk=" + uk}
        resp = self._request(None, data=data,
                             extra_params={"sign": sign, "clienttype": 0, "timestamp": int(time.time())}, url=url,
                             headers=reqheader)
        return resp

Source File: SougouSpider.py From Sougou_dict_spider with MIT License

5 votes

def GetDownloadList(self, resp):
        """获取下载链接"""
        downloadUrls = {}
        pattern = re.compile(r'name=(.*)')
        soup = BeautifulSoup(resp.text, "html.parser")
        dict_dl_lists = soup.find_all("div", class_="dict_dl_btn")
        for dict_dl_list in dict_dl_lists:
            dict_dl_url = dict_dl_list.a['href']
            dict_name = pattern.findall(dict_dl_url)[0]
            dict_ch_name = unquote(dict_name, 'utf-8').replace("/", "-").replace(",", "-").replace("|", "-") \
                .replace("\\", "-").replace("'", "-")
            downloadUrls[dict_ch_name] = dict_dl_url
        return downloadUrls

Source File: utils.py From a4kScrapers with MIT License

5 votes

def normalize(string):
    unescaped = unescape(string)
    unquoted = unquote(unescaped)
    return unicodedata.normalize("NFKD", unquoted).replace('\n', '')

Source File: requirements.py From pdm with MIT License

5 votes

def _parse_name_from_url(self) -> None:
        parsed = urlparse.urlparse(self.url)
        fragments = dict(urlparse.parse_qsl(parsed.fragment))
        if "egg" in fragments:
            egg_info = urlparse.unquote(fragments["egg"])
            name, extras = strip_extras(egg_info)
            self.name = name
            self.extras = extras
        if not self.name:
            filename = os.path.basename(url_without_fragments(self.url))
            if filename.endswith(".whl"):
                self.name, self.version = parse_name_version_from_wheel(filename)

Source File: replica.py From rucio with Apache License 2.0

5 votes

def GET(self):
        """
        Return a summary of the bad replicas by incident.

        HTTP Success:
            200 OK

        HTTP Error:
            406 Not Acceptable
            500 InternalError

        """
        header('Content-Type', 'application/x-json-stream')
        result = []
        rse_expression, from_date, to_date = None, None, None
        if ctx.query:
            try:
                params = loads(unquote(ctx.query[1:]))
            except ValueError:
                params = parse_qs(ctx.query[1:])
            if 'rse_expression' in params:
                rse_expression = params['rse_expression'][0]
            if 'from_date' in params and params['from_date'][0]:
                from_date = datetime.strptime(params['from_date'][0], "%Y-%m-%d")
            if 'to_date' in params:
                to_date = datetime.strptime(params['to_date'][0], "%Y-%m-%d")

        try:
            result = get_bad_replicas_summary(rse_expression=rse_expression, from_date=from_date, to_date=to_date)
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__, error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
        for row in result:
            yield dumps(row, cls=APIEncoder) + '\n'

Source File: pan.py From baidu-wangpan-parse with MIT License

5 votes

def get_resp_json(self, need_verify=False):
        url = 'http://pan.baidu.com/api/sharedownload'
        payload = {
            'sign': self.sign,
            'timestamp': self.timestamp,
            'bdstoken': 'null',
            'channel': 'chunlei',
            'clienttype': '0',
            'web': '1',
            'app_id': '250528',
        }
        data = {
            'encrypt': '0',
            'product': 'share',
            'type': 'nolimit',
            'uk': self.uk,
            'primaryid': self.primary_id,
            'fid_list': self.fid_list,
        }

        if self.is_folder:
            data['type'] = 'batch'

        if self.is_encrypt:
            data['extra'] = '{"sekey":"' + parse.unquote(self.sess.cookies['BDCLND']) + '"}'

        if need_verify:
            data['vcode_input'] = self.verify_code_input
            data['vcode_str'] = self.verify_code_str

        resp = self.sess.post(
            url=url,
            params=payload,
            data=data,
            headers=self.headers
        )
        return json.loads(resp.text)

Source File: utils.py From insightconnect-plugins with MIT License

5 votes

def decode_url(url: str) -> str:
    url_split = url.split("?")
    split_url_params = url_split[1].split("&")
    for param in split_url_params:
        if param.split("=")[0] == "url":
            return unquote(param.split("=")[1])
    return url

Source File: gcs.py From lm-human-preferences with MIT License

5 votes

def parse_url(url):
    """Given a gs:// path, returns bucket name and blob path."""
    result = urlparse(url)
    if result.scheme == 'gs':
        return result.netloc, unquote(result.path.lstrip('/'))
    elif result.scheme == 'https':
        assert result.netloc == 'storage.googleapis.com'
        bucket, rest = result.path.lstrip('/').split('/', 1)
        return bucket, unquote(rest)
    else:
        raise Exception(f'Could not parse {url} as gcs url')

Source File: replica.py From rucio with Apache License 2.0

5 votes

def GET(self, scope, name):
        """
        List dataset replicas for a DID (scope:name) using the
        Virtual Placement service.

        NOTICE: This is an RnD function and might change or go away at any time.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            406 Not Acceptable
            500 InternalError

        :returns: If VP exists a list of dicts of sites, otherwise nothing
        """

        header('Content-Type', 'application/x-json-stream')
        deep = False
        if ctx.query:
            try:
                params = loads(unquote(ctx.query[1:]))
            except ValueError:
                params = parse_qs(ctx.query[1:])
            if 'deep' in params:
                deep = params['deep'][0]
        try:
            for row in list_dataset_replicas_vp(scope=scope, name=name, deep=deep):
                yield dumps(row, cls=APIEncoder) + '\n'
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__, error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)

Source File: replica.py From rucio with Apache License 2.0

5 votes

def GET(self, scope, name):
        """
        List dataset replicas replicas.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            406 Not Acceptable
            500 InternalError

        :returns: A dictionary containing all replicas information.
        """
        header('Content-Type', 'application/x-json-stream')
        deep = False
        if ctx.query:
            try:
                params = loads(unquote(ctx.query[1:]))
            except ValueError:
                params = parse_qs(ctx.query[1:])
            if 'deep' in params:
                deep = params['deep'][0]
        try:
            for row in list_dataset_replicas(scope=scope, name=name, deep=deep):
                yield dumps(row, cls=APIEncoder) + '\n'
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__, error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)

Source File: replica.py From rucio with Apache License 2.0

5 votes

def GET(self):
        """
        Return a summary of the bad replicas by incident.

        HTTP Success:
            200 OK

        HTTP Error:
            406 Not Acceptable
            500 InternalError

        """
        header('Content-Type', 'application/x-json-stream')
        result = []
        rse_expression, from_date, to_date = None, None, None
        if ctx.query:
            try:
                params = loads(unquote(ctx.query[1:]))
            except ValueError:
                params = parse_qs(ctx.query[1:])
            if 'rse_expression' in params:
                rse_expression = params['rse_expression'][0]
            if 'from_date' in params and params['from_date'][0]:
                from_date = datetime.strptime(params['from_date'][0], "%Y-%m-%d")
            if 'to_date' in params:
                to_date = datetime.strptime(params['to_date'][0], "%Y-%m-%d")

        try:
            result = get_bad_replicas_summary(rse_expression=rse_expression, from_date=from_date, to_date=to_date)
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__, error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
        for row in result:
            yield dumps(row, cls=APIEncoder) + '\n'

Source File: replica.py From rucio with Apache License 2.0

5 votes

def GET(self, scope, name):
        """
        List dataset replicas for a DID (scope:name) using the
        Virtual Placement service.

        NOTICE: This is an RnD function and might change or go away at any time.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            406 Not Acceptable
            500 InternalError

        :returns: If VP exists a list of dicts of sites, otherwise nothing
        """

        header('Content-Type', 'application/x-json-stream')
        deep = False
        if ctx.query:
            try:
                params = loads(unquote(ctx.query[1:]))
            except ValueError:
                params = parse_qs(ctx.query[1:])
            if 'deep' in params:
                deep = params['deep'][0]
        try:
            for row in list_dataset_replicas_vp(scope=scope, name=name, deep=deep):
                yield dumps(row, cls=APIEncoder) + '\n'
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__, error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)

Source File: replica.py From rucio with Apache License 2.0

5 votes

def GET(self, scope, name):
        """
        List dataset replicas replicas.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            406 Not Acceptable
            500 InternalError

        :returns: A dictionary containing all replicas information.
        """
        header('Content-Type', 'application/x-json-stream')
        deep = False
        if ctx.query:
            try:
                params = loads(unquote(ctx.query[1:]))
            except ValueError:
                params = parse_qs(ctx.query[1:])
            if 'deep' in params:
                deep = params['deep'][0]
        try:
            for row in list_dataset_replicas(scope=scope, name=name, deep=deep):
                yield dumps(row, cls=APIEncoder) + '\n'
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__, error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)

Source File: fixers.py From jbox with MIT License

5 votes

def __call__(self, environ, start_response):
        for key in 'REQUEST_URL', 'REQUEST_URI', 'UNENCODED_URL':
            if key not in environ:
                continue
            request_uri = unquote(environ[key])
            script_name = unquote(environ.get('SCRIPT_NAME', ''))
            if request_uri.startswith(script_name):
                environ['PATH_INFO'] = request_uri[len(script_name):] \
                    .split('?', 1)[0]
                break
        return self.app(environ, start_response)

Python urllib.parse.unquote() Examples