Python Examples of six.moves.urllib

Source File: googlevideo.py From script.module.urlresolver with GNU General Public License v2.0

6 votes

def __extract_video(self, item):
        sources = []
        for e in item:
            if isinstance(e, dict):
                for key in e:
                    for item2 in e[key]:
                        if isinstance(item2, list):
                            for item3 in item2:
                                if isinstance(item3, list):
                                    for item4 in item3:
                                        if isinstance(item4, six.text_type) and six.PY2:  # @big change
                                            item4 = item4.encode('utf-8')
                                        if isinstance(item4, six.string_types) and six.PY2:  # @big change
                                            item4 = urllib_parse.unquote(item4).decode('unicode_escape')
                                            for match in re.finditer('url=(?P<link>[^&]+).*?&itag=(?P<itag>[^&]+)', item4):
                                                link = match.group('link')
                                                itag = match.group('itag')
                                                quality = self.itag_map.get(itag, 'Unknown Quality [%s]' % itag)
                                                sources.append((quality, link))
                                            if sources:
                                                return sources
        return sources

Source File: googlevideo.py From script.module.urlresolver with GNU General Public License v2.0

6 votes

def _parse_gdocs(self, html):
        urls = []
        for match in re.finditer(r'\[\s*"([^"]+)"\s*,\s*"([^"]+)"\s*\]', html):
            key, value = match.groups()
            if key == 'fmt_stream_map':
                items = value.split(',')
                for item in items:
                    _source_itag, source_url = item.split('|')
                    if isinstance(source_url, six.text_type) and six.PY2:  # @big change
                        source_url = source_url.encode('utf-8')
                    source_url = source_url.decode('unicode_escape')
                    quality = self.itag_map.get(_source_itag, 'Unknown Quality [%s]' % _source_itag)
                    source_url = urllib_parse.unquote(source_url)
                    urls.append((quality, source_url))
                return urls

        return urls

Source File: batch.py From apitools with Apache License 2.0

6 votes

def _ConvertHeaderToId(header):
        """Convert a Content-ID header value to an id.

        Presumes the Content-ID header conforms to the format that
        _ConvertIdToHeader() returns.

        Args:
          header: A string indicating the Content-ID header value.

        Returns:
          The extracted id value.

        Raises:
          BatchError if the header is not in the expected format.
        """
        if not (header.startswith('<') or header.endswith('>')):
            raise exceptions.BatchError(
                'Invalid value for Content-ID: %s' % header)
        if '+' not in header:
            raise exceptions.BatchError(
                'Invalid value for Content-ID: %s' % header)
        _, request_id = header[1:-1].rsplit('+', 1)

        return urllib_parse.unquote(request_id)

Source File: googlevideo.py From script.module.resolveurl with GNU General Public License v2.0

6 votes

def _parse_gdocs(self, html):
        urls = []
        for match in re.finditer(r'\[\s*"([^"]+)"\s*,\s*"([^"]+)"\s*\]', html):
            key, value = match.groups()
            if key == 'fmt_stream_map':
                items = value.split(',')
                for item in items:
                    _source_itag, source_url = item.split('|')
                    if isinstance(source_url, six.text_type) and six.PY2:  # @big change
                        source_url = source_url.encode('utf-8')
                    source_url = source_url.decode('unicode_escape')
                    quality = self.itag_map.get(_source_itag, 'Unknown Quality [%s]' % _source_itag)
                    source_url = urllib_parse.unquote(source_url)
                    urls.append((quality, source_url))
                return urls

        return urls

Source File: browser_search.py From timesketch with Apache License 2.0

6 votes

def _decode_url(self, url):
        """Decodes the URL, replaces %XX to their corresponding characters.

        Args:
          url (str): encoded URL.

        Returns:
          str: decoded URL.
        """
        if not url:
            return ''

        # pylint: disable=too-many-function-args
        decoded_url = urlparse.unquote(url)
        if isinstance(decoded_url, six.binary_type):
            try:
                decoded_url = decoded_url.decode('utf-8')
            except UnicodeDecodeError as exception:
                decoded_url = decoded_url.decode('utf-8', errors='replace')
                logging.warning(
                    'Unable to decode URL: {0:s} with error: {1!s}'.format(
                        url, exception))

        return decoded_url

Source File: googlevideo.py From script.module.resolveurl with GNU General Public License v2.0

6 votes

def __extract_video(self, item):
        sources = []
        for e in item:
            if isinstance(e, dict):
                for key in e:
                    for item2 in e[key]:
                        if isinstance(item2, list):
                            for item3 in item2:
                                if isinstance(item3, list):
                                    for item4 in item3:
                                        if isinstance(item4, six.text_type) and six.PY2:  # @big change
                                            item4 = item4.encode('utf-8')
                                        if isinstance(item4, six.string_types) and six.PY2:  # @big change
                                            item4 = urllib_parse.unquote(item4).decode('unicode_escape')
                                            for match in re.finditer('url=(?P<link>[^&]+).*?&itag=(?P<itag>[^&]+)', item4):
                                                link = match.group('link')
                                                itag = match.group('itag')
                                                quality = self.itag_map.get(itag, 'Unknown Quality [%s]' % itag)
                                                sources.append((quality, link))
                                            if sources:
                                                return sources
        return sources

Source File: veeHD.py From script.module.resolveurl with GNU General Public License v2.0

5 votes

def get_media_url(self, host, media_id):
        if not self.get_setting('login') == 'true' or not (self.get_setting('username') and self.get_setting('password')):
            raise ResolverError('VeeHD requires a username & password')

        web_url = self.get_url(host, media_id)
        html = self.net.http_GET(web_url).content

        # two possible playeriframe's: stream and download
        for match in re.finditer(r'playeriframe.+?src\s*:\s*"([^"]+)', html):
            player_url = 'http://%s%s' % (host, match.group(1))
            html = self.net.http_GET(player_url).content

            # if the player html contains an iframe the iframe url has to be gotten and then the player_url tried again
            r = re.search('<iframe.*?src="([^"]+)', html)
            if r:
                frame_url = 'http://%s%s' % (host, r.group(1))
                self.net.http_GET(frame_url)
                html = self.net.http_GET(player_url).content

            patterns = [r'"video/divx"\s+src="([^"]+)', r'"url"\s*:\s*"([^"]+)', 'href="([^"]+(?:mp4|avi))']
            for pattern in patterns:
                r = re.search(pattern, html)
                if r:
                    stream_url = urllib_parse.unquote(r.group(1))
                    return stream_url

        raise ResolverError('File Not Found or Removed')

Source File: codecs.py From jaeger-client-python with Apache License 2.0

5 votes

def extract(self, carrier):
        if not hasattr(carrier, 'items'):
            raise InvalidCarrierException('carrier not a collection')
        trace_id, span_id, parent_id, flags = None, None, None, None
        baggage = None
        debug_id = None
        for key, value in six.iteritems(carrier):
            uc_key = key.lower()
            if uc_key == self.trace_id_header:
                if self.url_encoding:
                    value = urllib_parse.unquote(value)
                trace_id, span_id, parent_id, flags = \
                    span_context_from_string(value)
            elif uc_key.startswith(self.baggage_prefix):
                if self.url_encoding:
                    value = urllib_parse.unquote(value)
                attr_key = key[self.prefix_length:]
                if baggage is None:
                    baggage = {attr_key.lower(): value}
                else:
                    baggage[attr_key.lower()] = value
            elif uc_key == self.debug_id_header:
                if self.url_encoding:
                    value = urllib_parse.unquote(value)
                debug_id = value
            elif uc_key == self.baggage_header:
                if self.url_encoding:
                    value = urllib_parse.unquote(value)
                baggage = self._parse_baggage_header(value, baggage)
        if not trace_id or not span_id:
            # reset all IDs
            trace_id, span_id, parent_id, flags = None, None, None, None
        if not trace_id and not debug_id and not baggage:
            return None
        return SpanContext(trace_id=trace_id, span_id=span_id,
                           parent_id=parent_id, flags=flags,
                           baggage=baggage, debug_id=debug_id)

Source File: config.py From patroni with MIT License

5 votes

def conninfo_uri_parse(dsn):
    ret = {}
    r = urlparse(dsn)
    if r.username:
        ret['user'] = r.username
    if r.password:
        ret['password'] = r.password
    if r.path[1:]:
        ret['dbname'] = r.path[1:]
    hosts = []
    ports = []
    for netloc in r.netloc.split('@')[-1].split(','):
        host = port = None
        if '[' in netloc and ']' in netloc:
            host = netloc.split(']')[0][1:]
        tmp = netloc.split(':', 1)
        if host is None:
            host = tmp[0]
        if len(tmp) == 2:
            host, port = tmp
        if host is not None:
            hosts.append(host)
        if port is not None:
            ports.append(port)
    if hosts:
        ret['host'] = ','.join(hosts)
    if ports:
        ret['port'] = ','.join(ports)
    ret = {name: unquote(value) for name, value in ret.items()}
    ret.update({name: value for name, value in parse_qsl(r.query)})
    if ret.get('ssl') == 'true':
        del ret['ssl']
        ret['sslmode'] = 'require'
    return ret

Source File: odnoklassniki.py From Dailyfresh-B2C with Apache License 2.0

5 votes

def get_user_details(self, response):
        fullname, first_name, last_name = self.get_user_names(
            fullname=unquote(response['name']),
            first_name=unquote(response['first_name']),
            last_name=unquote(response['last_name'])
        )
        return {
            'username': response['uid'],
            'email': '',
            'fullname': fullname,
            'first_name': first_name,
            'last_name': last_name
        }

Source File: odnoklassniki.py From Dailyfresh-B2C with Apache License 2.0

5 votes

def get_user_details(self, response):
        """Return user details from Odnoklassniki request"""
        fullname, first_name, last_name = self.get_user_names(
            fullname=unquote(response['name']),
            first_name=unquote(response['first_name']),
            last_name=unquote(response['last_name'])
        )
        return {
            'username': response['uid'],
            'email': response.get('email', ''),
            'fullname': fullname,
            'first_name': first_name,
            'last_name': last_name
        }

Source File: oauth.py From Dailyfresh-B2C with Apache License 2.0

5 votes

def auth_url(self):
        """Return redirect url"""
        state = self.get_or_create_state()
        params = self.auth_params(state)
        params.update(self.get_scope_argument())
        params.update(self.auth_extra_arguments())
        params = urlencode(params)
        if not self.REDIRECT_STATE:
            # redirect_uri matching is strictly enforced, so match the
            # providers value exactly.
            params = unquote(params)
        return '{0}?{1}'.format(self.authorization_url(), params)

Source File: mailru.py From Dailyfresh-B2C with Apache License 2.0

5 votes

def get_user_details(self, response):
        """Return user details from Mail.ru request"""
        fullname, first_name, last_name = self.get_user_names(
            first_name=unquote(response['first_name']),
            last_name=unquote(response['last_name'])
        )
        return {'username': unquote(response['nick']),
                'email': unquote(response['email']),
                'fullname': fullname,
                'first_name': first_name,
                'last_name': last_name}

Source File: path.py From vistir with ISC License

5 votes

def url_to_path(url):
    # type: (str) -> str
    """Convert a valid file url to a local filesystem path.

    Follows logic taken from pip's equivalent function
    """

    assert is_file_url(url), "Only file: urls can be converted to local paths"
    _, netloc, path, _, _ = urllib_parse.urlsplit(url)
    # Netlocs are UNC paths
    if netloc:
        netloc = "\\\\" + netloc

    path = urllib_request.url2pathname(netloc + path)
    return urllib_parse.unquote(path)

Source File: cda.py From script.module.resolveurl with GNU General Public License v2.0

5 votes

def cda_decode(self, a):
        a = a.replace("_XDDD", "")
        a = a.replace("_CDA", "")
        a = a.replace("_ADC", "")
        a = a.replace("_CXD", "")
        a = a.replace("_QWE", "")
        a = a.replace("_Q5", "")
        a = urllib_parse.unquote(a)
        a = ''.join([chr(33 + (ord(char) + 14) % 94) if 32 < ord(char) < 127 else char for char in a])
        a = a.replace(".cda.mp4", "")
        a = a.replace(".2cda.pl", ".cda.pl")
        a = a.replace(".3cda.pl", ".cda.pl")
        return "https://{0}.mp4".format(a)

Source File: youtube.py From script.module.resolveurl with GNU General Public License v2.0

5 votes

def get_media_url(self, host, media_id):
        try:
            web_url = self.get_url(host, media_id)
            html = self.net.http_GET(web_url, headers=self.headers).content
            stream_map = urllib_parse.unquote(re.findall('url_encoded_fmt_stream_map=([^&]+)', html)[0])
            streams = stream_map.split(',')
            sources = []
            streams_mp4 = [item for item in streams if 'video%2Fmp4' in item]
            for stream in streams_mp4:
                quality = re.findall('quality=([^&]+)', stream)[0]
                url = re.findall('url=([^&]+)', stream)[0]
                sources.append((quality, urllib_parse.unquote(url)))
            if sources:
                return helpers.pick_source(sources)

        except:
            if youtube_resolver is None:
                return 'plugin://plugin.video.youtube/play/?video_id=' + media_id
            else:
                streams = youtube_resolver.resolve(media_id)
                streams_no_dash = [item for item in streams if item['container'] != 'mpd']
                stream_tuples = [(item['title'], item['url']) for item in streams_no_dash]
                if stream_tuples:
                    return helpers.pick_source(stream_tuples)

        raise ResolverError('Video not found')

Source File: facebook.py From script.module.resolveurl with GNU General Public License v2.0

5 votes

def get_media_url(self, host, media_id):
        web_url = self.get_url(host, media_id)
        html = self.net.http_GET(web_url).content

        if html.find('Video Unavailable') >= 0:
            err_message = 'The requested video was not found.'
            raise ResolverError(err_message)

        videoUrl = re.compile('"(?:hd_src|sd_src)":"(.+?)"').findall(html)
        videoUrl = [urllib_parse.unquote(i.replace('\\u0025', '%')) for i in videoUrl]
        videoUrl = [i.replace('\\', '') for i in videoUrl]

        vUrl = ''
        vUrlsCount = len(videoUrl)
        if vUrlsCount > 0:
            q = self.get_setting('quality')
            if q == '0':
                # Highest Quality
                vUrl = videoUrl[0]
            else:
                # Standard Quality
                vUrl = videoUrl[vUrlsCount - 1]

            return vUrl

        else:
            raise ResolverError('No playable video found.')

Source File: __init__.py From image_process with GNU Affero General Public License v3.0

5 votes

def process_image(image, settings):
    # Set default value for 'IMAGE_PROCESS_FORCE'.
    if "IMAGE_PROCESS_FORCE" not in settings:
        settings["IMAGE_PROCESS_FORCE"] = False

    # remove URL encoding to get to physical filenames
    image = list(image)
    image[0] = unquote(image[0])
    image[1] = unquote(image[1])
    # image[2] is the transformation

    path, _ = os.path.split(image[1])
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno == 17:
            # Already exists
            pass

    # If original image is older than existing derivative, skip
    # processing to save time, unless user explicitly forced
    # image generation.
    if (
        settings["IMAGE_PROCESS_FORCE"]
        or not os.path.exists(image[1])
        or os.path.getmtime(image[0]) > os.path.getmtime(image[1])
    ):

        i = Image.open(image[0])

        for step in image[2]:
            if hasattr(step, "__call__"):
                i = step(i)
            else:
                elems = step.split(" ")
                i = basic_ops[elems[0]](i, *(elems[1:]))

        # `save_all=True`  will allow saving multi-page (aka animated) GIF's
        # however, turning it on seems to break PNG support, and doesn't seem
        # to work on GIF's either...
        i.save(image[1], progressive=True)

Source File: vivosx.py From script.module.resolveurl with GNU General Public License v2.0

5 votes

def get_media_url(self, host, media_id):
        web_url = self.get_url(host, media_id)
        headers = {'User-Agent': common.RAND_UA,
                   'Referer': web_url}
        html = self.net.http_GET(web_url, headers=headers).content

        r = re.search(r'''InitializeStream.+?source:\s*['"]([^'"]+)''', html, re.DOTALL)

        if r:
            return _rot47(urllib_parse.unquote(r.group(1))) + helpers.append_headers(headers)

        raise ResolverError('Video cannot be located.')

Source File: etree.py From wextracto with BSD 3-Clause "New" or "Revised" License

5 votes

def get_base_url_from_root(root):
    if root.base_url:
        # see :func:`.parse` for why we need to unquote
        base_url = unquote_base_url(root.base_url)
    else:
        base_url = root.base_url
    return reduce(urljoin, base_href(root)[:1], base_url)

Source File: etree.py From wextracto with BSD 3-Clause "New" or "Revised" License

5 votes

def parse(src):
    """ Returns an element tree create by `LXML <http://lxml.de/>`_.
       :param src: A readable object such as a :class:`wex.response.Response`.
    """

    if not hasattr(src, 'read'):
        return src

    etree = _ElementTree()
    try:
        stream = HTMLStream(src)
        # Sometimes we get URLs containing characters that aren't
        # acceptable to lxml (e.g. "http:/foo.com/bar?this=array[]").
        # When this happens lxml will quote the whole URL.
        # We don't want to have to check for this so we just always
        # quote it here and then unquote it in the `base_url` function.
        quoted_base_url = quote_base_url(src.url) if src.url else src.url
        while True:
            try:
                fp = replace_invalid_ncr(stream)
                # fp is a Unicode stream
                # The lxml FAQ tells us that it is inefficient to do this
                # http://lxml.de/FAQ.html#can-lxml-parse-from-file-objects-opened-in-unicode-text-mode
                # but actually it seems just fine as long as you tell the parser to use 'utf-8'!?
                parser = HTMLParser(encoding='utf-8')
                etree.parse(fp, parser=parser, base_url=quoted_base_url)
                break
            except UnicodeDecodeError as exc:
                stream.next_encoding()
    except IOError as exc:
        logger = logging.getLogger(__name__)
        logger.warning("IOError parsing %s (%s)", src.url, exc)

    root = etree.getroot()
    if root is None:
        etree._setroot(UNPARSEABLE)

    return etree

Source File: etree.py From wextracto with BSD 3-Clause "New" or "Revised" License

5 votes

def unquote_base_url(quoted):
        assert isinstance(quoted, unicode)
        quoted = quoted.encode('ascii')
        unquoted = unquote(quoted)
        return unquoted.decode('utf-8')

Source File: cda.py From script.module.urlresolver with GNU General Public License v2.0

5 votes

def cda_decode(self, a):
        a = a.replace("_XDDD", "")
        a = a.replace("_CDA", "")
        a = a.replace("_ADC", "")
        a = a.replace("_CXD", "")
        a = a.replace("_QWE", "")
        a = a.replace("_Q5", "")
        a = urllib_parse.unquote(a)
        a = ''.join([chr(33 + (ord(char) + 14) % 94) if 32 < ord(char) < 127 else char for char in a])
        a = a.replace(".cda.mp4", "")
        a = a.replace(".2cda.pl", ".cda.pl")
        a = a.replace(".3cda.pl", ".cda.pl")
        return "https://{0}.mp4".format(a)

Source File: youtube.py From script.module.urlresolver with GNU General Public License v2.0

5 votes

def get_media_url(self, host, media_id):
        try:
            web_url = self.get_url(host, media_id)
            html = self.net.http_GET(web_url, headers=self.headers).content
            stream_map = urllib_parse.unquote(re.findall('url_encoded_fmt_stream_map=([^&]+)', html)[0])
            streams = stream_map.split(',')
            sources = []
            streams_mp4 = [item for item in streams if 'video%2Fmp4' in item]
            for stream in streams_mp4:
                quality = re.findall('quality=([^&]+)', stream)[0]
                url = re.findall('url=([^&]+)', stream)[0]
                sources.append((quality, urllib_parse.unquote(url)))
            if sources:
                return helpers.pick_source(sources)

        except:
            if youtube_resolver is None:
                return 'plugin://plugin.video.youtube/play/?video_id=' + media_id
            else:
                streams = youtube_resolver.resolve(media_id)
                streams_no_dash = [item for item in streams if item['container'] != 'mpd']
                stream_tuples = [(item['title'], item['url']) for item in streams_no_dash]
                if stream_tuples:
                    return helpers.pick_source(stream_tuples)

        raise ResolverError('Video not found')

Source File: facebook.py From script.module.urlresolver with GNU General Public License v2.0

5 votes

def get_media_url(self, host, media_id):
        web_url = self.get_url(host, media_id)
        html = self.net.http_GET(web_url).content

        if html.find('Video Unavailable') >= 0:
            err_message = 'The requested video was not found.'
            raise ResolverError(err_message)

        videoUrl = re.compile('"(?:hd_src|sd_src)":"(.+?)"').findall(html)
        videoUrl = [urllib_parse.unquote(i.replace('\\u0025', '%')) for i in videoUrl]
        videoUrl = [i.replace('\\', '') for i in videoUrl]

        vUrl = ''
        vUrlsCount = len(videoUrl)
        if vUrlsCount > 0:
            q = self.get_setting('quality')
            if q == '0':
                # Highest Quality
                vUrl = videoUrl[0]
            else:
                # Standard Quality
                vUrl = videoUrl[vUrlsCount - 1]

            return vUrl

        else:
            raise ResolverError('No playable video found.')

Source File: veeHD.py From script.module.urlresolver with GNU General Public License v2.0

5 votes

def get_media_url(self, host, media_id):
        if not self.get_setting('login') == 'true' or not (self.get_setting('username') and self.get_setting('password')):
            raise ResolverError('VeeHD requires a username & password')

        web_url = self.get_url(host, media_id)
        html = self.net.http_GET(web_url).content

        # two possible playeriframe's: stream and download
        for match in re.finditer(r'playeriframe.+?src\s*:\s*"([^"]+)', html):
            player_url = 'http://%s%s' % (host, match.group(1))
            html = self.net.http_GET(player_url).content

            # if the player html contains an iframe the iframe url has to be gotten and then the player_url tried again
            r = re.search('<iframe.*?src="([^"]+)', html)
            if r:
                frame_url = 'http://%s%s' % (host, r.group(1))
                self.net.http_GET(frame_url)
                html = self.net.http_GET(player_url).content

            patterns = [r'"video/divx"\s+src="([^"]+)', r'"url"\s*:\s*"([^"]+)', 'href="([^"]+(?:mp4|avi))']
            for pattern in patterns:
                r = re.search(pattern, html)
                if r:
                    stream_url = urllib_parse.unquote(r.group(1))
                    return stream_url

        raise ResolverError('File Not Found or Removed')

Source File: vivosx.py From script.module.urlresolver with GNU General Public License v2.0

5 votes

def get_media_url(self, host, media_id):
        web_url = self.get_url(host, media_id)
        headers = {'User-Agent': common.RAND_UA,
                   'Referer': web_url}
        html = self.net.http_GET(web_url, headers=headers).content

        r = re.search(r'''InitializeStream.+?source:\s*['"]([^'"]+)''', html, re.DOTALL)

        if r:
            return _rot47(urllib_parse.unquote(r.group(1))) + helpers.append_headers(headers)

        raise ResolverError('Video cannot be located.')

Source File: path.py From pipenv with MIT License

5 votes

def url_to_path(url):
    # type: (str) -> str
    """Convert a valid file url to a local filesystem path.

    Follows logic taken from pip's equivalent function
    """

    assert is_file_url(url), "Only file: urls can be converted to local paths"
    _, netloc, path, _, _ = urllib_parse.urlsplit(url)
    # Netlocs are UNC paths
    if netloc:
        netloc = "\\\\" + netloc

    path = urllib_request.url2pathname(netloc + path)
    return urllib_parse.unquote(path)

Python six.moves.urllib_parse.unquote() Examples