Python split url
28 Python code examples are found related to "
split url".
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: utils.py From pywren-ibm-cloud with Apache License 2.0 | 7 votes |
def split_object_url(obj_url): if '://' in obj_url: sb, path = obj_url.split('://') else: sb = None path = obj_url sb = 'ibm_cos' if sb == 'cos' else sb bucket, full_key = path.split('/', 1) if '/' in path else (path, '') if full_key.endswith('/'): prefix = full_key obj_name = '' elif full_key: prefix, obj_name = full_key.rsplit('/', 1) if '/' in full_key else ('', full_key) else: prefix = '' obj_name = '' return sb, bucket, prefix, obj_name
Example 2
Source File: data.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def split_resource_url(resource_url): """ Splits a resource url into "<protocol>:<path>". >>> windows = sys.platform.startswith('win') >>> split_resource_url('nltk:home/nltk') ('nltk', 'home/nltk') >>> split_resource_url('nltk:/home/nltk') ('nltk', '/home/nltk') >>> split_resource_url('file:/home/nltk') ('file', '/home/nltk') >>> split_resource_url('file:///home/nltk') ('file', '/home/nltk') >>> split_resource_url('file:///C:/home/nltk') ('file', '/C:/home/nltk') """ protocol, path_ = resource_url.split(':', 1) if protocol == 'nltk': pass elif protocol == 'file': if path_.startswith('/'): path_ = '/' + path_.lstrip('/') else: path_ = re.sub(r'^/{0,2}', '', path_) return protocol, path_
Example 3
Source File: globalfunctions.py From aeneas with GNU Affero General Public License v3.0 | 6 votes |
def split_url(url): """ Split the given URL ``base#anchor`` into ``(base, anchor)``, or ``(base, None)`` if no anchor is present. In case there are two or more ``#`` characters, return only the first two tokens: ``a#b#c => (a, b)``. :param string url: the url :rtype: list of str """ if url is None: return (None, None) array = url.split("#") if len(array) == 1: array.append(None) return tuple(array[0:2])
Example 4
Source File: split_url_events.py From aw-core with Mozilla Public License 2.0 | 6 votes |
def split_url_events(events: List[Event]) -> List[Event]: for event in events: if "url" in event.data: url = event.data["url"] parsed_url = urlparse(url) event.data["$protocol"] = parsed_url.scheme event.data["$domain"] = ( parsed_url.netloc[4:] if parsed_url.netloc[:4] == "www." else parsed_url.netloc ) event.data["$path"] = parsed_url.path event.data["$params"] = parsed_url.params event.data["$options"] = parsed_url.query event.data["$identifier"] = parsed_url.fragment # TODO: Parse user, port etc aswell return events
Example 5
Source File: misc.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def split_auth_netloc_from_url(url): # type: (str) -> Tuple[str, str, Tuple[str, str]] """ Parse a url into separate netloc, auth, and url with no auth. Returns: (url_without_auth, netloc, (username, password)) """ url_without_auth, (netloc, auth) = _transform_url(url, _get_netloc) return url_without_auth, netloc, auth
Example 6
Source File: s3.py From cli with MIT License | 5 votes |
def split_url(url: urllib.parse.ParseResult) -> Tuple[S3Bucket, str]: """ Splits the given s3:// *url* into a Bucket object and normalized path with some sanity checking. """ # Require a bucket name if not url.netloc: raise UserError("No bucket name specified in url (%s)" % url.geturl()) # Remove leading slashes from any destination path in order to use it as a # prefix for uploaded files. Internal and trailing slashes are untouched. prefix = url.path.lstrip("/") try: bucket = boto3.resource("s3").Bucket(url.netloc) except (NoCredentialsError, PartialCredentialsError) as error: raise UserError("Unable to authenticate with S3: %s" % error) from error # Find the bucket and ensure we have access and that it already exists so # we don't automagically create new buckets. try: boto3.client("s3").head_bucket(Bucket = bucket.name) except ClientError as error: raise UserError(dedent('''\ No bucket exists with the name "%s". Buckets are not automatically created for safety reasons. ''' % bucket.name)) return bucket, prefix
Example 7
Source File: browser.py From FODI with GNU General Public License v3.0 | 5 votes |
def split_url(url): url_splited = urlparse(url) return { 'path': url_splited.path, 'params': dict(parse_qsl(url_splited.query)) }
Example 8
Source File: wrenutil.py From pywren with Apache License 2.0 | 5 votes |
def split_s3_url(s3_url): if s3_url[:5] != "s3://": raise ValueError("URL {} is not valid".format(s3_url)) splits = s3_url[5:].split("/") bucket_name = splits[0] key = "/".join(splits[1:]) return bucket_name, key
Example 9
Source File: plugin.py From robotframework-seleniumtestability with Apache License 2.0 | 5 votes |
def split_url_to_host_and_path(url: str) -> dict: """ Returs given url as dict with property "base" set to a protocol and hostname and "path" as the trailing path. This is useful when constructing requests sessions from urls used within SeleniumLibrary. """ data = furl(url) return {"base": str(data.copy().remove(path=True)), "path": str(data.path)}
Example 10
Source File: utils.py From oreilly-flask-apis-video with MIT License | 5 votes |
def split_url(url, method='GET'): """Returns the endpoint name and arguments that match a given URL. In other words, this is the reverse of Flask's url_for().""" appctx = _app_ctx_stack.top reqctx = _request_ctx_stack.top if appctx is None: raise RuntimeError('Attempted to match a URL without the ' 'application context being pushed. This has to be ' 'executed when application context is available.') if reqctx is not None: url_adapter = reqctx.url_adapter else: url_adapter = appctx.url_adapter if url_adapter is None: raise RuntimeError('Application was not able to create a URL ' 'adapter for request independent URL matching. ' 'You might be able to fix this by setting ' 'the SERVER_NAME config variable.') parsed_url = url_parse(url) if parsed_url.netloc is not '' and \ parsed_url.netloc != url_adapter.server_name: raise ValidationError('Invalid URL: ' + url) try: result = url_adapter.match(parsed_url.path, method) except NotFound: raise ValidationError('Invalid URL: ' + url) return result
Example 11
Source File: bulkload_client.py From python-compat-runtime with Apache License 2.0 | 5 votes |
def SplitURL(url): """Splits an HTTP URL into pieces. Args: url: String containing a full URL string (e.g., 'http://blah.com:8080/stuff?param=1#foo') Returns: Tuple (netloc, uri) where: netloc: String containing the host/port combination from the URL. The port is optional. (e.g., 'blah.com:8080'). uri: String containing the relative URI of the URL. (e.g., '/stuff'). """ scheme, netloc, path, query, fragment = urlparse.urlsplit(url) return netloc, path
Example 12
Source File: net.py From luci-py with Apache License 2.0 | 5 votes |
def split_server_request_url(url): """Splits the url into scheme+netloc and path+params+query+fragment.""" url_parts = list(urllib.parse.urlparse(url)) urlhost = '%s://%s' % (url_parts[0], url_parts[1]) urlpath = urllib.parse.urlunparse(['', ''] + url_parts[2:]) return urlhost, urlpath
Example 13
Source File: file_utils.py From flux-ci with MIT License | 5 votes |
def split_url_path(path): """ Separates URL path to repository name and path. # Parameters path (str): The path from URL. # Return tuple (str, str): The repository name and the path to be listed. """ separator = '/' parts = path.split(separator) return separator.join(parts[0:2]), separator.join(parts[2:])
Example 14
Source File: common.py From hivemind with MIT License | 5 votes |
def split_url(url, allow_empty=False): """Validate and split a post url into author/permlink.""" if not url: assert allow_empty, 'url must be specified' return None assert isinstance(url, str), 'url must be a string' parts = url.split('/') assert len(parts) == 2, 'invalid url parts' author = valid_account(parts[0]) permlink = valid_permlink(parts[1]) return (author, permlink)
Example 15
Source File: censys_search.py From CloudBunny with MIT License | 5 votes |
def split_url(url): if re.match(r'http(s?)\:', url): parsed = urlsplit(url) return parsed.netloc else: return url
Example 16
Source File: http.py From honeything with GNU General Public License v3.0 | 5 votes |
def SplitUrl(url): Url = collections.namedtuple('Url', ('method host port path')) method, rest = urllib.splittype(url) hostport, path = urllib.splithost(rest) host, port = urllib.splitport(hostport) return Url(method, host, int(port or 0), path)
Example 17
Source File: utils.py From fence with Apache License 2.0 | 5 votes |
def split_url_and_query_params(url): scheme, netloc, path, query_string, fragment = urlsplit(url) query_params = parse_qs(query_string) url = urlunsplit((scheme, netloc, path, None, fragment)) return url, query_params
Example 18
Source File: boltdepot.py From cqparts with Apache License 2.0 | 5 votes |
def split_url(url): match = re.search(r'^(?P<base>.*)\?(?P<params>.*)$', url, flags=re.I) return ( match.group('base'), {k: v for (k, v) in (p.split('=') for p in match.group('params').split('&'))} )
Example 19
Source File: dsio.py From xcube with MIT License | 5 votes |
def split_obs_url(path: str) -> Tuple[Optional[str], str]: """ If *path* is a URL, return tuple (endpoint_url, root), otherwise (None, *path*) """ url = urllib3.util.parse_url(path) if all((url.scheme, url.host, url.path)) and url.scheme != 's3': if url.port is not None: endpoint_url = f'{url.scheme}://{url.host}:{url.port}' else: endpoint_url = f'{url.scheme}://{url.host}' root = url.path if root.startswith('/'): root = root[1:] return endpoint_url, root return None, path
Example 20
Source File: util.py From Computable with MIT License | 5 votes |
def split_url(url): """split a zmq url (tcp://ip:port) into ('tcp','ip','port').""" proto_addr = url.split('://') assert len(proto_addr) == 2, 'Invalid url: %r'%url proto, addr = proto_addr lis = addr.split(':') assert len(lis) == 2, 'Invalid url: %r'%url addr,s_port = lis return proto,addr,s_port
Example 21
Source File: tools.py From JobFunnel with MIT License | 5 votes |
def split_url(url): # capture protocol, ip address and port from given url match = re.match(r'^(http[s]?):\/\/([A-Za-z0-9.]+):([0-9]+)?(.*)$', url) # if not all groups have a match, match will be None if match is not None: return { 'protocol': match.group(1), 'ip_address': match.group(2), 'port': match.group(3), } else: return None
Example 22
Source File: utils.py From URLNet with Apache License 2.0 | 4 votes |
def split_url(line, part): if line.startswith("http://"): line=line[7:] if line.startswith("https://"): line=line[8:] if line.startswith("ftp://"): line=line[6:] if line.startswith("www."): line = line[4:] slash_pos = line.find('/') if slash_pos > 0 and slash_pos < len(line)-1: # line = "fsdfsdf/sdfsdfsd" primarydomain = line[:slash_pos] path_argument = line[slash_pos+1:] path_argument_tokens = path_argument.split('/') pathtoken = "/".join(path_argument_tokens[:-1]) last_pathtoken = path_argument_tokens[-1] if len(path_argument_tokens) > 2 and last_pathtoken == '': pathtoken = "/".join(path_argument_tokens[:-2]) last_pathtoken = path_argument_tokens[-2] question_pos = last_pathtoken.find('?') if question_pos != -1: argument = last_pathtoken[question_pos+1:] pathtoken = pathtoken + "/" + last_pathtoken[:question_pos] else: argument = "" pathtoken = pathtoken + "/" + last_pathtoken last_slash_pos = pathtoken.rfind('/') sub_dir = pathtoken[:last_slash_pos] filename = pathtoken[last_slash_pos+1:] file_last_dot_pos = filename.rfind('.') if file_last_dot_pos != -1: file_extension = filename[file_last_dot_pos+1:] filename = filename[:file_last_dot_pos] else: file_extension = "" elif slash_pos == 0: # line = "/fsdfsdfsdfsdfsd" primarydomain = line[1:] pathtoken = "" argument = "" sub_dir = "" filename = "" file_extension = "" elif slash_pos == len(line)-1: # line = "fsdfsdfsdfsdfsd/" primarydomain = line[:-1] pathtoken = "" argument = "" sub_dir = "" filename = "" file_extension = "" else: # line = "fsdfsdfsdfsdfsd" primarydomain = line pathtoken = "" argument = "" sub_dir = "" filename = "" file_extension = "" if part == 'pd': return primarydomain elif part == 'path': return pathtoken elif part == 'argument': return argument elif part == 'sub_dir': return sub_dir elif part == 'filename': return filename elif part == 'fe': return file_extension elif part == 'others': if len(argument) > 0: return pathtoken + '?' + argument else: return pathtoken else: return primarydomain, pathtoken, argument, sub_dir, filename, file_extension