Python six.moves.urllib_parse.unquote() Examples

The following are 28 code examples of six.moves.urllib_parse.unquote(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module six.moves.urllib_parse , or try the search function .
Example #1
Source File: googlevideo.py    From script.module.urlresolver with GNU General Public License v2.0 6 votes vote down vote up
def __extract_video(self, item):
        sources = []
        for e in item:
            if isinstance(e, dict):
                for key in e:
                    for item2 in e[key]:
                        if isinstance(item2, list):
                            for item3 in item2:
                                if isinstance(item3, list):
                                    for item4 in item3:
                                        if isinstance(item4, six.text_type) and six.PY2:  # @big change
                                            item4 = item4.encode('utf-8')
                                        if isinstance(item4, six.string_types) and six.PY2:  # @big change
                                            item4 = urllib_parse.unquote(item4).decode('unicode_escape')
                                            for match in re.finditer('url=(?P<link>[^&]+).*?&itag=(?P<itag>[^&]+)', item4):
                                                link = match.group('link')
                                                itag = match.group('itag')
                                                quality = self.itag_map.get(itag, 'Unknown Quality [%s]' % itag)
                                                sources.append((quality, link))
                                            if sources:
                                                return sources
        return sources 
Example #2
Source File: googlevideo.py    From script.module.urlresolver with GNU General Public License v2.0 6 votes vote down vote up
def _parse_gdocs(self, html):
        urls = []
        for match in re.finditer(r'\[\s*"([^"]+)"\s*,\s*"([^"]+)"\s*\]', html):
            key, value = match.groups()
            if key == 'fmt_stream_map':
                items = value.split(',')
                for item in items:
                    _source_itag, source_url = item.split('|')
                    if isinstance(source_url, six.text_type) and six.PY2:  # @big change
                        source_url = source_url.encode('utf-8')
                    source_url = source_url.decode('unicode_escape')
                    quality = self.itag_map.get(_source_itag, 'Unknown Quality [%s]' % _source_itag)
                    source_url = urllib_parse.unquote(source_url)
                    urls.append((quality, source_url))
                return urls

        return urls 
Example #3
Source File: batch.py    From apitools with Apache License 2.0 6 votes vote down vote up
def _ConvertHeaderToId(header):
        """Convert a Content-ID header value to an id.

        Presumes the Content-ID header conforms to the format that
        _ConvertIdToHeader() returns.

        Args:
          header: A string indicating the Content-ID header value.

        Returns:
          The extracted id value.

        Raises:
          BatchError if the header is not in the expected format.
        """
        if not (header.startswith('<') or header.endswith('>')):
            raise exceptions.BatchError(
                'Invalid value for Content-ID: %s' % header)
        if '+' not in header:
            raise exceptions.BatchError(
                'Invalid value for Content-ID: %s' % header)
        _, request_id = header[1:-1].rsplit('+', 1)

        return urllib_parse.unquote(request_id) 
Example #4
Source File: googlevideo.py    From script.module.resolveurl with GNU General Public License v2.0 6 votes vote down vote up
def _parse_gdocs(self, html):
        urls = []
        for match in re.finditer(r'\[\s*"([^"]+)"\s*,\s*"([^"]+)"\s*\]', html):
            key, value = match.groups()
            if key == 'fmt_stream_map':
                items = value.split(',')
                for item in items:
                    _source_itag, source_url = item.split('|')
                    if isinstance(source_url, six.text_type) and six.PY2:  # @big change
                        source_url = source_url.encode('utf-8')
                    source_url = source_url.decode('unicode_escape')
                    quality = self.itag_map.get(_source_itag, 'Unknown Quality [%s]' % _source_itag)
                    source_url = urllib_parse.unquote(source_url)
                    urls.append((quality, source_url))
                return urls

        return urls 
Example #5
Source File: browser_search.py    From timesketch with Apache License 2.0 6 votes vote down vote up
def _decode_url(self, url):
        """Decodes the URL, replaces %XX to their corresponding characters.

        Args:
          url (str): encoded URL.

        Returns:
          str: decoded URL.
        """
        if not url:
            return ''

        # pylint: disable=too-many-function-args
        decoded_url = urlparse.unquote(url)
        if isinstance(decoded_url, six.binary_type):
            try:
                decoded_url = decoded_url.decode('utf-8')
            except UnicodeDecodeError as exception:
                decoded_url = decoded_url.decode('utf-8', errors='replace')
                logging.warning(
                    'Unable to decode URL: {0:s} with error: {1!s}'.format(
                        url, exception))

        return decoded_url 
Example #6
Source File: googlevideo.py    From script.module.resolveurl with GNU General Public License v2.0 6 votes vote down vote up
def __extract_video(self, item):
        sources = []
        for e in item:
            if isinstance(e, dict):
                for key in e:
                    for item2 in e[key]:
                        if isinstance(item2, list):
                            for item3 in item2:
                                if isinstance(item3, list):
                                    for item4 in item3:
                                        if isinstance(item4, six.text_type) and six.PY2:  # @big change
                                            item4 = item4.encode('utf-8')
                                        if isinstance(item4, six.string_types) and six.PY2:  # @big change
                                            item4 = urllib_parse.unquote(item4).decode('unicode_escape')
                                            for match in re.finditer('url=(?P<link>[^&]+).*?&itag=(?P<itag>[^&]+)', item4):
                                                link = match.group('link')
                                                itag = match.group('itag')
                                                quality = self.itag_map.get(itag, 'Unknown Quality [%s]' % itag)
                                                sources.append((quality, link))
                                            if sources:
                                                return sources
        return sources 
Example #7
Source File: veeHD.py    From script.module.resolveurl with GNU General Public License v2.0 5 votes vote down vote up
def get_media_url(self, host, media_id):
        if not self.get_setting('login') == 'true' or not (self.get_setting('username') and self.get_setting('password')):
            raise ResolverError('VeeHD requires a username & password')

        web_url = self.get_url(host, media_id)
        html = self.net.http_GET(web_url).content

        # two possible playeriframe's: stream and download
        for match in re.finditer(r'playeriframe.+?src\s*:\s*"([^"]+)', html):
            player_url = 'http://%s%s' % (host, match.group(1))
            html = self.net.http_GET(player_url).content

            # if the player html contains an iframe the iframe url has to be gotten and then the player_url tried again
            r = re.search('<iframe.*?src="([^"]+)', html)
            if r:
                frame_url = 'http://%s%s' % (host, r.group(1))
                self.net.http_GET(frame_url)
                html = self.net.http_GET(player_url).content

            patterns = [r'"video/divx"\s+src="([^"]+)', r'"url"\s*:\s*"([^"]+)', 'href="([^"]+(?:mp4|avi))']
            for pattern in patterns:
                r = re.search(pattern, html)
                if r:
                    stream_url = urllib_parse.unquote(r.group(1))
                    return stream_url

        raise ResolverError('File Not Found or Removed') 
Example #8
Source File: codecs.py    From jaeger-client-python with Apache License 2.0 5 votes vote down vote up
def extract(self, carrier):
        if not hasattr(carrier, 'items'):
            raise InvalidCarrierException('carrier not a collection')
        trace_id, span_id, parent_id, flags = None, None, None, None
        baggage = None
        debug_id = None
        for key, value in six.iteritems(carrier):
            uc_key = key.lower()
            if uc_key == self.trace_id_header:
                if self.url_encoding:
                    value = urllib_parse.unquote(value)
                trace_id, span_id, parent_id, flags = \
                    span_context_from_string(value)
            elif uc_key.startswith(self.baggage_prefix):
                if self.url_encoding:
                    value = urllib_parse.unquote(value)
                attr_key = key[self.prefix_length:]
                if baggage is None:
                    baggage = {attr_key.lower(): value}
                else:
                    baggage[attr_key.lower()] = value
            elif uc_key == self.debug_id_header:
                if self.url_encoding:
                    value = urllib_parse.unquote(value)
                debug_id = value
            elif uc_key == self.baggage_header:
                if self.url_encoding:
                    value = urllib_parse.unquote(value)
                baggage = self._parse_baggage_header(value, baggage)
        if not trace_id or not span_id:
            # reset all IDs
            trace_id, span_id, parent_id, flags = None, None, None, None
        if not trace_id and not debug_id and not baggage:
            return None
        return SpanContext(trace_id=trace_id, span_id=span_id,
                           parent_id=parent_id, flags=flags,
                           baggage=baggage, debug_id=debug_id) 
Example #9
Source File: config.py    From patroni with MIT License 5 votes vote down vote up
def conninfo_uri_parse(dsn):
    ret = {}
    r = urlparse(dsn)
    if r.username:
        ret['user'] = r.username
    if r.password:
        ret['password'] = r.password
    if r.path[1:]:
        ret['dbname'] = r.path[1:]
    hosts = []
    ports = []
    for netloc in r.netloc.split('@')[-1].split(','):
        host = port = None
        if '[' in netloc and ']' in netloc:
            host = netloc.split(']')[0][1:]
        tmp = netloc.split(':', 1)
        if host is None:
            host = tmp[0]
        if len(tmp) == 2:
            host, port = tmp
        if host is not None:
            hosts.append(host)
        if port is not None:
            ports.append(port)
    if hosts:
        ret['host'] = ','.join(hosts)
    if ports:
        ret['port'] = ','.join(ports)
    ret = {name: unquote(value) for name, value in ret.items()}
    ret.update({name: value for name, value in parse_qsl(r.query)})
    if ret.get('ssl') == 'true':
        del ret['ssl']
        ret['sslmode'] = 'require'
    return ret 
Example #10
Source File: odnoklassniki.py    From Dailyfresh-B2C with Apache License 2.0 5 votes vote down vote up
def get_user_details(self, response):
        fullname, first_name, last_name = self.get_user_names(
            fullname=unquote(response['name']),
            first_name=unquote(response['first_name']),
            last_name=unquote(response['last_name'])
        )
        return {
            'username': response['uid'],
            'email': '',
            'fullname': fullname,
            'first_name': first_name,
            'last_name': last_name
        } 
Example #11
Source File: odnoklassniki.py    From Dailyfresh-B2C with Apache License 2.0 5 votes vote down vote up
def get_user_details(self, response):
        """Return user details from Odnoklassniki request"""
        fullname, first_name, last_name = self.get_user_names(
            fullname=unquote(response['name']),
            first_name=unquote(response['first_name']),
            last_name=unquote(response['last_name'])
        )
        return {
            'username': response['uid'],
            'email': response.get('email', ''),
            'fullname': fullname,
            'first_name': first_name,
            'last_name': last_name
        } 
Example #12
Source File: oauth.py    From Dailyfresh-B2C with Apache License 2.0 5 votes vote down vote up
def auth_url(self):
        """Return redirect url"""
        state = self.get_or_create_state()
        params = self.auth_params(state)
        params.update(self.get_scope_argument())
        params.update(self.auth_extra_arguments())
        params = urlencode(params)
        if not self.REDIRECT_STATE:
            # redirect_uri matching is strictly enforced, so match the
            # providers value exactly.
            params = unquote(params)
        return '{0}?{1}'.format(self.authorization_url(), params) 
Example #13
Source File: mailru.py    From Dailyfresh-B2C with Apache License 2.0 5 votes vote down vote up
def get_user_details(self, response):
        """Return user details from Mail.ru request"""
        fullname, first_name, last_name = self.get_user_names(
            first_name=unquote(response['first_name']),
            last_name=unquote(response['last_name'])
        )
        return {'username': unquote(response['nick']),
                'email': unquote(response['email']),
                'fullname': fullname,
                'first_name': first_name,
                'last_name': last_name} 
Example #14
Source File: path.py    From vistir with ISC License 5 votes vote down vote up
def url_to_path(url):
    # type: (str) -> str
    """Convert a valid file url to a local filesystem path.

    Follows logic taken from pip's equivalent function
    """

    assert is_file_url(url), "Only file: urls can be converted to local paths"
    _, netloc, path, _, _ = urllib_parse.urlsplit(url)
    # Netlocs are UNC paths
    if netloc:
        netloc = "\\\\" + netloc

    path = urllib_request.url2pathname(netloc + path)
    return urllib_parse.unquote(path) 
Example #15
Source File: cda.py    From script.module.resolveurl with GNU General Public License v2.0 5 votes vote down vote up
def cda_decode(self, a):
        a = a.replace("_XDDD", "")
        a = a.replace("_CDA", "")
        a = a.replace("_ADC", "")
        a = a.replace("_CXD", "")
        a = a.replace("_QWE", "")
        a = a.replace("_Q5", "")
        a = urllib_parse.unquote(a)
        a = ''.join([chr(33 + (ord(char) + 14) % 94) if 32 < ord(char) < 127 else char for char in a])
        a = a.replace(".cda.mp4", "")
        a = a.replace(".2cda.pl", ".cda.pl")
        a = a.replace(".3cda.pl", ".cda.pl")
        return "https://{0}.mp4".format(a) 
Example #16
Source File: youtube.py    From script.module.resolveurl with GNU General Public License v2.0 5 votes vote down vote up
def get_media_url(self, host, media_id):
        try:
            web_url = self.get_url(host, media_id)
            html = self.net.http_GET(web_url, headers=self.headers).content
            stream_map = urllib_parse.unquote(re.findall('url_encoded_fmt_stream_map=([^&]+)', html)[0])
            streams = stream_map.split(',')
            sources = []
            streams_mp4 = [item for item in streams if 'video%2Fmp4' in item]
            for stream in streams_mp4:
                quality = re.findall('quality=([^&]+)', stream)[0]
                url = re.findall('url=([^&]+)', stream)[0]
                sources.append((quality, urllib_parse.unquote(url)))
            if sources:
                return helpers.pick_source(sources)

        except:
            if youtube_resolver is None:
                return 'plugin://plugin.video.youtube/play/?video_id=' + media_id
            else:
                streams = youtube_resolver.resolve(media_id)
                streams_no_dash = [item for item in streams if item['container'] != 'mpd']
                stream_tuples = [(item['title'], item['url']) for item in streams_no_dash]
                if stream_tuples:
                    return helpers.pick_source(stream_tuples)

        raise ResolverError('Video not found') 
Example #17
Source File: facebook.py    From script.module.resolveurl with GNU General Public License v2.0 5 votes vote down vote up
def get_media_url(self, host, media_id):
        web_url = self.get_url(host, media_id)
        html = self.net.http_GET(web_url).content

        if html.find('Video Unavailable') >= 0:
            err_message = 'The requested video was not found.'
            raise ResolverError(err_message)

        videoUrl = re.compile('"(?:hd_src|sd_src)":"(.+?)"').findall(html)
        videoUrl = [urllib_parse.unquote(i.replace('\\u0025', '%')) for i in videoUrl]
        videoUrl = [i.replace('\\', '') for i in videoUrl]

        vUrl = ''
        vUrlsCount = len(videoUrl)
        if vUrlsCount > 0:
            q = self.get_setting('quality')
            if q == '0':
                # Highest Quality
                vUrl = videoUrl[0]
            else:
                # Standard Quality
                vUrl = videoUrl[vUrlsCount - 1]

            return vUrl

        else:
            raise ResolverError('No playable video found.') 
Example #18
Source File: __init__.py    From image_process with GNU Affero General Public License v3.0 5 votes vote down vote up
def process_image(image, settings):
    # Set default value for 'IMAGE_PROCESS_FORCE'.
    if "IMAGE_PROCESS_FORCE" not in settings:
        settings["IMAGE_PROCESS_FORCE"] = False

    # remove URL encoding to get to physical filenames
    image = list(image)
    image[0] = unquote(image[0])
    image[1] = unquote(image[1])
    # image[2] is the transformation

    path, _ = os.path.split(image[1])
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno == 17:
            # Already exists
            pass

    # If original image is older than existing derivative, skip
    # processing to save time, unless user explicitly forced
    # image generation.
    if (
        settings["IMAGE_PROCESS_FORCE"]
        or not os.path.exists(image[1])
        or os.path.getmtime(image[0]) > os.path.getmtime(image[1])
    ):

        i = Image.open(image[0])

        for step in image[2]:
            if hasattr(step, "__call__"):
                i = step(i)
            else:
                elems = step.split(" ")
                i = basic_ops[elems[0]](i, *(elems[1:]))

        # `save_all=True`  will allow saving multi-page (aka animated) GIF's
        # however, turning it on seems to break PNG support, and doesn't seem
        # to work on GIF's either...
        i.save(image[1], progressive=True) 
Example #19
Source File: vivosx.py    From script.module.resolveurl with GNU General Public License v2.0 5 votes vote down vote up
def get_media_url(self, host, media_id):
        web_url = self.get_url(host, media_id)
        headers = {'User-Agent': common.RAND_UA,
                   'Referer': web_url}
        html = self.net.http_GET(web_url, headers=headers).content

        r = re.search(r'''InitializeStream.+?source:\s*['"]([^'"]+)''', html, re.DOTALL)

        if r:
            return _rot47(urllib_parse.unquote(r.group(1))) + helpers.append_headers(headers)

        raise ResolverError('Video cannot be located.') 
Example #20
Source File: etree.py    From wextracto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_base_url_from_root(root):
    if root.base_url:
        # see :func:`.parse` for why we need to unquote
        base_url = unquote_base_url(root.base_url)
    else:
        base_url = root.base_url
    return reduce(urljoin, base_href(root)[:1], base_url) 
Example #21
Source File: etree.py    From wextracto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def parse(src):
    """ Returns an element tree create by `LXML <http://lxml.de/>`_.
       :param src: A readable object such as a :class:`wex.response.Response`.
    """

    if not hasattr(src, 'read'):
        return src

    etree = _ElementTree()
    try:
        stream = HTMLStream(src)
        # Sometimes we get URLs containing characters that aren't
        # acceptable to lxml (e.g. "http:/foo.com/bar?this=array[]").
        # When this happens lxml will quote the whole URL.
        # We don't want to have to check for this so we just always
        # quote it here and then unquote it in the `base_url` function.
        quoted_base_url = quote_base_url(src.url) if src.url else src.url
        while True:
            try:
                fp = replace_invalid_ncr(stream)
                # fp is a Unicode stream
                # The lxml FAQ tells us that it is inefficient to do this
                # http://lxml.de/FAQ.html#can-lxml-parse-from-file-objects-opened-in-unicode-text-mode
                # but actually it seems just fine as long as you tell the parser to use 'utf-8'!?
                parser = HTMLParser(encoding='utf-8')
                etree.parse(fp, parser=parser, base_url=quoted_base_url)
                break
            except UnicodeDecodeError as exc:
                stream.next_encoding()
    except IOError as exc:
        logger = logging.getLogger(__name__)
        logger.warning("IOError parsing %s (%s)", src.url, exc)

    root = etree.getroot()
    if root is None:
        etree._setroot(UNPARSEABLE)

    return etree 
Example #22
Source File: etree.py    From wextracto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def unquote_base_url(quoted):
        assert isinstance(quoted, unicode)
        quoted = quoted.encode('ascii')
        unquoted = unquote(quoted)
        return unquoted.decode('utf-8') 
Example #23
Source File: cda.py    From script.module.urlresolver with GNU General Public License v2.0 5 votes vote down vote up
def cda_decode(self, a):
        a = a.replace("_XDDD", "")
        a = a.replace("_CDA", "")
        a = a.replace("_ADC", "")
        a = a.replace("_CXD", "")
        a = a.replace("_QWE", "")
        a = a.replace("_Q5", "")
        a = urllib_parse.unquote(a)
        a = ''.join([chr(33 + (ord(char) + 14) % 94) if 32 < ord(char) < 127 else char for char in a])
        a = a.replace(".cda.mp4", "")
        a = a.replace(".2cda.pl", ".cda.pl")
        a = a.replace(".3cda.pl", ".cda.pl")
        return "https://{0}.mp4".format(a) 
Example #24
Source File: youtube.py    From script.module.urlresolver with GNU General Public License v2.0 5 votes vote down vote up
def get_media_url(self, host, media_id):
        try:
            web_url = self.get_url(host, media_id)
            html = self.net.http_GET(web_url, headers=self.headers).content
            stream_map = urllib_parse.unquote(re.findall('url_encoded_fmt_stream_map=([^&]+)', html)[0])
            streams = stream_map.split(',')
            sources = []
            streams_mp4 = [item for item in streams if 'video%2Fmp4' in item]
            for stream in streams_mp4:
                quality = re.findall('quality=([^&]+)', stream)[0]
                url = re.findall('url=([^&]+)', stream)[0]
                sources.append((quality, urllib_parse.unquote(url)))
            if sources:
                return helpers.pick_source(sources)

        except:
            if youtube_resolver is None:
                return 'plugin://plugin.video.youtube/play/?video_id=' + media_id
            else:
                streams = youtube_resolver.resolve(media_id)
                streams_no_dash = [item for item in streams if item['container'] != 'mpd']
                stream_tuples = [(item['title'], item['url']) for item in streams_no_dash]
                if stream_tuples:
                    return helpers.pick_source(stream_tuples)

        raise ResolverError('Video not found') 
Example #25
Source File: facebook.py    From script.module.urlresolver with GNU General Public License v2.0 5 votes vote down vote up
def get_media_url(self, host, media_id):
        web_url = self.get_url(host, media_id)
        html = self.net.http_GET(web_url).content

        if html.find('Video Unavailable') >= 0:
            err_message = 'The requested video was not found.'
            raise ResolverError(err_message)

        videoUrl = re.compile('"(?:hd_src|sd_src)":"(.+?)"').findall(html)
        videoUrl = [urllib_parse.unquote(i.replace('\\u0025', '%')) for i in videoUrl]
        videoUrl = [i.replace('\\', '') for i in videoUrl]

        vUrl = ''
        vUrlsCount = len(videoUrl)
        if vUrlsCount > 0:
            q = self.get_setting('quality')
            if q == '0':
                # Highest Quality
                vUrl = videoUrl[0]
            else:
                # Standard Quality
                vUrl = videoUrl[vUrlsCount - 1]

            return vUrl

        else:
            raise ResolverError('No playable video found.') 
Example #26
Source File: veeHD.py    From script.module.urlresolver with GNU General Public License v2.0 5 votes vote down vote up
def get_media_url(self, host, media_id):
        if not self.get_setting('login') == 'true' or not (self.get_setting('username') and self.get_setting('password')):
            raise ResolverError('VeeHD requires a username & password')

        web_url = self.get_url(host, media_id)
        html = self.net.http_GET(web_url).content

        # two possible playeriframe's: stream and download
        for match in re.finditer(r'playeriframe.+?src\s*:\s*"([^"]+)', html):
            player_url = 'http://%s%s' % (host, match.group(1))
            html = self.net.http_GET(player_url).content

            # if the player html contains an iframe the iframe url has to be gotten and then the player_url tried again
            r = re.search('<iframe.*?src="([^"]+)', html)
            if r:
                frame_url = 'http://%s%s' % (host, r.group(1))
                self.net.http_GET(frame_url)
                html = self.net.http_GET(player_url).content

            patterns = [r'"video/divx"\s+src="([^"]+)', r'"url"\s*:\s*"([^"]+)', 'href="([^"]+(?:mp4|avi))']
            for pattern in patterns:
                r = re.search(pattern, html)
                if r:
                    stream_url = urllib_parse.unquote(r.group(1))
                    return stream_url

        raise ResolverError('File Not Found or Removed') 
Example #27
Source File: vivosx.py    From script.module.urlresolver with GNU General Public License v2.0 5 votes vote down vote up
def get_media_url(self, host, media_id):
        web_url = self.get_url(host, media_id)
        headers = {'User-Agent': common.RAND_UA,
                   'Referer': web_url}
        html = self.net.http_GET(web_url, headers=headers).content

        r = re.search(r'''InitializeStream.+?source:\s*['"]([^'"]+)''', html, re.DOTALL)

        if r:
            return _rot47(urllib_parse.unquote(r.group(1))) + helpers.append_headers(headers)

        raise ResolverError('Video cannot be located.') 
Example #28
Source File: path.py    From pipenv with MIT License 5 votes vote down vote up
def url_to_path(url):
    # type: (str) -> str
    """Convert a valid file url to a local filesystem path.

    Follows logic taken from pip's equivalent function
    """

    assert is_file_url(url), "Only file: urls can be converted to local paths"
    _, netloc, path, _, _ = urllib_parse.urlsplit(url)
    # Netlocs are UNC paths
    if netloc:
        netloc = "\\\\" + netloc

    path = urllib_request.url2pathname(netloc + path)
    return urllib_parse.unquote(path)