Python split url

28 Python code examples are found related to " split url". You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: utils.py    From pywren-ibm-cloud with Apache License 2.0 7 votes vote down vote up
def split_object_url(obj_url):
    if '://' in obj_url:
        sb, path = obj_url.split('://')
    else:
        sb = None
        path = obj_url

    sb = 'ibm_cos' if sb == 'cos' else sb

    bucket, full_key = path.split('/', 1) if '/' in path else (path, '')

    if full_key.endswith('/'):
        prefix = full_key
        obj_name = ''
    elif full_key:
        prefix, obj_name = full_key.rsplit('/', 1) if '/' in full_key else ('', full_key)
    else:
        prefix = ''
        obj_name = ''

    return sb, bucket, prefix, obj_name 
Example 2
Source File: data.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def split_resource_url(resource_url):
    """
    Splits a resource url into "<protocol>:<path>".

    >>> windows = sys.platform.startswith('win')
    >>> split_resource_url('nltk:home/nltk')
    ('nltk', 'home/nltk')
    >>> split_resource_url('nltk:/home/nltk')
    ('nltk', '/home/nltk')
    >>> split_resource_url('file:/home/nltk')
    ('file', '/home/nltk')
    >>> split_resource_url('file:///home/nltk')
    ('file', '/home/nltk')
    >>> split_resource_url('file:///C:/home/nltk')
    ('file', '/C:/home/nltk')
    """
    protocol, path_ = resource_url.split(':', 1)
    if protocol == 'nltk':
        pass
    elif protocol == 'file':
        if path_.startswith('/'):
            path_ = '/' + path_.lstrip('/')
    else:
        path_ = re.sub(r'^/{0,2}', '', path_)
    return protocol, path_ 
Example 3
Source File: globalfunctions.py    From aeneas with GNU Affero General Public License v3.0 6 votes vote down vote up
def split_url(url):
    """
    Split the given URL ``base#anchor`` into ``(base, anchor)``,
    or ``(base, None)`` if no anchor is present.

    In case there are two or more ``#`` characters,
    return only the first two tokens: ``a#b#c => (a, b)``.

    :param string url: the url
    :rtype: list of str
    """
    if url is None:
        return (None, None)
    array = url.split("#")
    if len(array) == 1:
        array.append(None)
    return tuple(array[0:2]) 
Example 4
Source File: split_url_events.py    From aw-core with Mozilla Public License 2.0 6 votes vote down vote up
def split_url_events(events: List[Event]) -> List[Event]:
    for event in events:
        if "url" in event.data:
            url = event.data["url"]
            parsed_url = urlparse(url)
            event.data["$protocol"] = parsed_url.scheme
            event.data["$domain"] = (
                parsed_url.netloc[4:]
                if parsed_url.netloc[:4] == "www."
                else parsed_url.netloc
            )
            event.data["$path"] = parsed_url.path
            event.data["$params"] = parsed_url.params
            event.data["$options"] = parsed_url.query
            event.data["$identifier"] = parsed_url.fragment
            # TODO: Parse user, port etc aswell
    return events 
Example 5
Source File: misc.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def split_auth_netloc_from_url(url):
    # type: (str) -> Tuple[str, str, Tuple[str, str]]
    """
    Parse a url into separate netloc, auth, and url with no auth.

    Returns: (url_without_auth, netloc, (username, password))
    """
    url_without_auth, (netloc, auth) = _transform_url(url, _get_netloc)
    return url_without_auth, netloc, auth 
Example 6
Source File: s3.py    From cli with MIT License 5 votes vote down vote up
def split_url(url: urllib.parse.ParseResult) -> Tuple[S3Bucket, str]:
    """
    Splits the given s3:// *url* into a Bucket object and normalized path
    with some sanity checking.
    """
    # Require a bucket name
    if not url.netloc:
        raise UserError("No bucket name specified in url (%s)" % url.geturl())

    # Remove leading slashes from any destination path in order to use it as a
    # prefix for uploaded files.  Internal and trailing slashes are untouched.
    prefix = url.path.lstrip("/")

    try:
        bucket = boto3.resource("s3").Bucket(url.netloc)

    except (NoCredentialsError, PartialCredentialsError) as error:
        raise UserError("Unable to authenticate with S3: %s" % error) from error

    # Find the bucket and ensure we have access and that it already exists so
    # we don't automagically create new buckets.
    try:
        boto3.client("s3").head_bucket(Bucket = bucket.name)

    except ClientError as error:
        raise UserError(dedent('''\
            No bucket exists with the name "%s".

            Buckets are not automatically created for safety reasons.
            ''' % bucket.name))

    return bucket, prefix 
Example 7
Source File: browser.py    From FODI with GNU General Public License v3.0 5 votes vote down vote up
def split_url(url):
    url_splited = urlparse(url)
    return {
        'path': url_splited.path,
        'params': dict(parse_qsl(url_splited.query))
    } 
Example 8
Source File: wrenutil.py    From pywren with Apache License 2.0 5 votes vote down vote up
def split_s3_url(s3_url):
    if s3_url[:5] != "s3://":
        raise ValueError("URL {} is not valid".format(s3_url))


    splits = s3_url[5:].split("/")
    bucket_name = splits[0]
    key = "/".join(splits[1:])
    return bucket_name, key 
Example 9
Source File: plugin.py    From robotframework-seleniumtestability with Apache License 2.0 5 votes vote down vote up
def split_url_to_host_and_path(url: str) -> dict:
        """
        Returs given url as dict with property "base" set to a protocol and hostname and "path" as the trailing path.
        This is useful when constructing requests sessions from urls used within SeleniumLibrary.
        """
        data = furl(url)
        return {"base": str(data.copy().remove(path=True)), "path": str(data.path)} 
Example 10
Source File: utils.py    From oreilly-flask-apis-video with MIT License 5 votes vote down vote up
def split_url(url, method='GET'):
    """Returns the endpoint name and arguments that match a given URL. In
    other words, this is the reverse of Flask's url_for()."""
    appctx = _app_ctx_stack.top
    reqctx = _request_ctx_stack.top
    if appctx is None:
        raise RuntimeError('Attempted to match a URL without the '
                           'application context being pushed. This has to be '
                           'executed when application context is available.')

    if reqctx is not None:
        url_adapter = reqctx.url_adapter
    else:
        url_adapter = appctx.url_adapter
        if url_adapter is None:
            raise RuntimeError('Application was not able to create a URL '
                               'adapter for request independent URL matching. '
                               'You might be able to fix this by setting '
                               'the SERVER_NAME config variable.')
    parsed_url = url_parse(url)
    if parsed_url.netloc is not '' and \
                    parsed_url.netloc != url_adapter.server_name:
        raise ValidationError('Invalid URL: ' + url)
    try:
        result = url_adapter.match(parsed_url.path, method)
    except NotFound:
        raise ValidationError('Invalid URL: ' + url)
    return result 
Example 11
Source File: bulkload_client.py    From python-compat-runtime with Apache License 2.0 5 votes vote down vote up
def SplitURL(url):
  """Splits an HTTP URL into pieces.

  Args:
    url: String containing a full URL string (e.g.,
      'http://blah.com:8080/stuff?param=1#foo')

  Returns:
    Tuple (netloc, uri) where:
      netloc: String containing the host/port combination from the URL. The
        port is optional. (e.g., 'blah.com:8080').
      uri: String containing the relative URI of the URL. (e.g., '/stuff').
  """
  scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
  return netloc, path 
Example 12
Source File: net.py    From luci-py with Apache License 2.0 5 votes vote down vote up
def split_server_request_url(url):
  """Splits the url into scheme+netloc and path+params+query+fragment."""
  url_parts = list(urllib.parse.urlparse(url))
  urlhost = '%s://%s' % (url_parts[0], url_parts[1])
  urlpath = urllib.parse.urlunparse(['', ''] + url_parts[2:])
  return urlhost, urlpath 
Example 13
Source File: file_utils.py    From flux-ci with MIT License 5 votes vote down vote up
def split_url_path(path):
  """
  Separates URL path to repository name and path.

  # Parameters
  path (str): The path from URL.

  # Return
  tuple (str, str): The repository name and the path to be listed.
  """

  separator = '/'
  parts = path.split(separator)
  return separator.join(parts[0:2]), separator.join(parts[2:]) 
Example 14
Source File: common.py    From hivemind with MIT License 5 votes vote down vote up
def split_url(url, allow_empty=False):
    """Validate and split a post url into author/permlink."""
    if not url:
        assert allow_empty, 'url must be specified'
        return None
    assert isinstance(url, str), 'url must be a string'

    parts = url.split('/')
    assert len(parts) == 2, 'invalid url parts'

    author = valid_account(parts[0])
    permlink = valid_permlink(parts[1])
    return (author, permlink) 
Example 15
Source File: censys_search.py    From CloudBunny with MIT License 5 votes vote down vote up
def split_url(url):
    if re.match(r'http(s?)\:', url):
        parsed = urlsplit(url)
        return parsed.netloc
    else:
        return url 
Example 16
Source File: http.py    From honeything with GNU General Public License v3.0 5 votes vote down vote up
def SplitUrl(url):
  Url = collections.namedtuple('Url', ('method host port path'))
  method, rest = urllib.splittype(url)
  hostport, path = urllib.splithost(rest)
  host, port = urllib.splitport(hostport)
  return Url(method, host, int(port or 0), path) 
Example 17
Source File: utils.py    From fence with Apache License 2.0 5 votes vote down vote up
def split_url_and_query_params(url):
    scheme, netloc, path, query_string, fragment = urlsplit(url)
    query_params = parse_qs(query_string)
    url = urlunsplit((scheme, netloc, path, None, fragment))
    return url, query_params 
Example 18
Source File: boltdepot.py    From cqparts with Apache License 2.0 5 votes vote down vote up
def split_url(url):
    match = re.search(r'^(?P<base>.*)\?(?P<params>.*)$', url, flags=re.I)
    return (
        match.group('base'),
        {k: v for (k, v) in (p.split('=') for p in match.group('params').split('&'))}
    ) 
Example 19
Source File: dsio.py    From xcube with MIT License 5 votes vote down vote up
def split_obs_url(path: str) -> Tuple[Optional[str], str]:
    """
    If *path* is a URL, return tuple (endpoint_url, root), otherwise (None, *path*)
    """
    url = urllib3.util.parse_url(path)
    if all((url.scheme, url.host, url.path)) and url.scheme != 's3':
        if url.port is not None:
            endpoint_url = f'{url.scheme}://{url.host}:{url.port}'
        else:
            endpoint_url = f'{url.scheme}://{url.host}'
        root = url.path
        if root.startswith('/'):
            root = root[1:]
        return endpoint_url, root
    return None, path 
Example 20
Source File: util.py    From Computable with MIT License 5 votes vote down vote up
def split_url(url):
    """split a zmq url (tcp://ip:port) into ('tcp','ip','port')."""
    proto_addr = url.split('://')
    assert len(proto_addr) == 2, 'Invalid url: %r'%url
    proto, addr = proto_addr
    lis = addr.split(':')
    assert len(lis) == 2, 'Invalid url: %r'%url
    addr,s_port = lis
    return proto,addr,s_port 
Example 21
Source File: tools.py    From JobFunnel with MIT License 5 votes vote down vote up
def split_url(url):
    # capture protocol, ip address and port from given url
    match = re.match(r'^(http[s]?):\/\/([A-Za-z0-9.]+):([0-9]+)?(.*)$', url)

    # if not all groups have a match, match will be None
    if match is not None:
        return {
            'protocol': match.group(1),
            'ip_address': match.group(2),
            'port': match.group(3),
        }
    else:
        return None 
Example 22
Source File: utils.py    From URLNet with Apache License 2.0 4 votes vote down vote up
def split_url(line, part):
    if line.startswith("http://"):
        line=line[7:]
    if line.startswith("https://"):
        line=line[8:]
    if line.startswith("ftp://"):
        line=line[6:]
    if line.startswith("www."):
        line = line[4:]
    slash_pos = line.find('/')
    if slash_pos > 0 and slash_pos < len(line)-1: # line = "fsdfsdf/sdfsdfsd"
        primarydomain = line[:slash_pos]
        path_argument = line[slash_pos+1:]
        path_argument_tokens = path_argument.split('/')
        pathtoken = "/".join(path_argument_tokens[:-1])
        last_pathtoken = path_argument_tokens[-1]
        if len(path_argument_tokens) > 2 and last_pathtoken == '':
            pathtoken = "/".join(path_argument_tokens[:-2])
            last_pathtoken = path_argument_tokens[-2]
        question_pos = last_pathtoken.find('?')
        if question_pos != -1:
            argument = last_pathtoken[question_pos+1:]
            pathtoken = pathtoken + "/" + last_pathtoken[:question_pos]     
        else:
            argument = ""
            pathtoken = pathtoken + "/" + last_pathtoken          
        last_slash_pos = pathtoken.rfind('/')
        sub_dir = pathtoken[:last_slash_pos]
        filename = pathtoken[last_slash_pos+1:]
        file_last_dot_pos = filename.rfind('.')
        if file_last_dot_pos != -1:
            file_extension = filename[file_last_dot_pos+1:]
            filename = filename[:file_last_dot_pos]
        else:
            file_extension = "" 
    elif slash_pos == 0:    # line = "/fsdfsdfsdfsdfsd"
        primarydomain = line[1:]
        pathtoken = ""
        argument = ""
        sub_dir = ""
        filename = ""
        file_extension = ""
    elif slash_pos == len(line)-1:   # line = "fsdfsdfsdfsdfsd/"
        primarydomain = line[:-1]
        pathtoken = ""
        argument = ""
        sub_dir = ""     
        filename = ""
        file_extension = ""
    else:      # line = "fsdfsdfsdfsdfsd"
        primarydomain = line
        pathtoken = ""
        argument = ""
        sub_dir = "" 
        filename = ""
        file_extension = ""
    if part == 'pd':
        return primarydomain
    elif part == 'path':
        return pathtoken
    elif part == 'argument': 
        return argument 
    elif part == 'sub_dir': 
        return sub_dir 
    elif part == 'filename': 
        return filename 
    elif part == 'fe': 
        return file_extension
    elif part == 'others': 
        if len(argument) > 0: 
            return pathtoken + '?' +  argument 
        else: 
            return pathtoken 
    else:
        return primarydomain, pathtoken, argument, sub_dir, filename, file_extension