Python six.moves.urllib.parse.urljoin() Examples
The following are 30
code examples of six.moves.urllib.parse.urljoin().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
six.moves.urllib.parse
, or try the search function
.
Example #1
Source File: monzo_api.py From pymonzo with MIT License | 7 votes |
def _get_oauth_token(self): """ Get Monzo access token via OAuth2 `authorization code` grant type. Official docs: https://monzo.com/docs/#acquire-an-access-token :returns: OAuth 2 access token :rtype: dict """ url = urljoin(self.api_url, '/oauth2/token') oauth = OAuth2Session( client_id=self._client_id, redirect_uri=config.REDIRECT_URI, ) token = oauth.fetch_token( token_url=url, code=self._auth_code, client_secret=self._client_secret, ) return token
Example #2
Source File: utils.py From scrape with MIT License | 6 votes |
def clean_url(url, base_url=None): """Add base netloc and path to internal URLs and remove www, fragments.""" parsed_url = urlparse(url) fragment = "{url.fragment}".format(url=parsed_url) if fragment: url = url.split(fragment)[0] # Identify internal URLs and fix their format netloc = "{url.netloc}".format(url=parsed_url) if base_url is not None and not netloc: parsed_base = urlparse(base_url) split_base = "{url.scheme}://{url.netloc}{url.path}/".format(url=parsed_base) url = urljoin(split_base, url) netloc = "{url.netloc}".format(url=urlparse(url)) if "www." in netloc: url = url.replace(netloc, netloc.replace("www.", "")) return url.rstrip(string.punctuation)
Example #3
Source File: regex.py From learn_python3_spider with MIT License | 6 votes |
def _extract_links(self, response_text, response_url, response_encoding, base_url=None): def clean_text(text): return replace_escape_chars(remove_tags(text.decode(response_encoding))).strip() def clean_url(url): clean_url = '' try: clean_url = urljoin(base_url, replace_entities(clean_link(url.decode(response_encoding)))) except ValueError: pass return clean_url if base_url is None: base_url = get_base_url(response_text, response_url, response_encoding) links_text = linkre.findall(response_text) return [Link(clean_url(url).encode(response_encoding), clean_text(text)) for url, _, text in links_text]
Example #4
Source File: lxmlhtml.py From learn_python3_spider with MIT License | 6 votes |
def _extract_links(self, selector, response_url, response_encoding, base_url): links = [] # hacky way to get the underlying lxml parsed document for el, attr, attr_val in self._iter_links(selector.root): # pseudo lxml.html.HtmlElement.make_links_absolute(base_url) try: if self.strip: attr_val = strip_html5_whitespace(attr_val) attr_val = urljoin(base_url, attr_val) except ValueError: continue # skipping bogus links else: url = self.process_attr(attr_val) if url is None: continue url = to_native_str(url, encoding=response_encoding) # to fix relative links after process_value url = urljoin(response_url, url) link = Link(url, _collect_string_content(el) or u'', nofollow=rel_has_nofollow(el.get('rel'))) links.append(link) return self._deduplicate_if_needed(links)
Example #5
Source File: sgml.py From learn_python3_spider with MIT License | 6 votes |
def _extract_links(self, response_text, response_url, response_encoding, base_url=None): """ Do the real extraction work """ self.reset() self.feed(response_text) self.close() ret = [] if base_url is None: base_url = urljoin(response_url, self.base_url) if self.base_url else response_url for link in self.links: if isinstance(link.url, six.text_type): link.url = link.url.encode(response_encoding) try: link.url = urljoin(base_url, link.url) except ValueError: continue link.url = safe_url_string(link.url, response_encoding) link.text = to_unicode(link.text, response_encoding, errors='replace').strip() ret.append(link) return ret
Example #6
Source File: htmlparser.py From learn_python3_spider with MIT License | 6 votes |
def _extract_links(self, response_text, response_url, response_encoding): self.reset() self.feed(response_text) self.close() links = unique_list(self.links, key=lambda link: link.url) if self.unique else self.links ret = [] base_url = urljoin(response_url, self.base_url) if self.base_url else response_url for link in links: if isinstance(link.url, six.text_type): link.url = link.url.encode(response_encoding) try: link.url = urljoin(base_url, link.url) except ValueError: continue link.url = safe_url_string(link.url, response_encoding) link.text = link.text.decode(response_encoding) ret.append(link) return ret
Example #7
Source File: lxmlhtml.py From scrapy-cluster with MIT License | 6 votes |
def _extract_links(self, selector, response_url, response_encoding, base_url): ''' Pretty much the same function, just added 'ignore' to to_native_str() ''' links = [] # hacky way to get the underlying lxml parsed document for el, attr, attr_val in self._iter_links(selector.root): # pseudo lxml.html.HtmlElement.make_links_absolute(base_url) try: attr_val = urljoin(base_url, attr_val) except ValueError: continue # skipping bogus links else: url = self.process_attr(attr_val) if url is None: continue # added 'ignore' to encoding errors url = to_native_str(url, encoding=response_encoding, errors='ignore') # to fix relative links after process_value url = urljoin(response_url, url) link = Link(url, _collect_string_content(el) or u'', nofollow=rel_has_nofollow(el.get('rel'))) links.append(link) return self._deduplicate_if_needed(links)
Example #8
Source File: lxmlhtml.py From learn_python3_spider with MIT License | 6 votes |
def _extract_links(self, selector, response_url, response_encoding, base_url): links = [] # hacky way to get the underlying lxml parsed document for el, attr, attr_val in self._iter_links(selector.root): # pseudo lxml.html.HtmlElement.make_links_absolute(base_url) try: if self.strip: attr_val = strip_html5_whitespace(attr_val) attr_val = urljoin(base_url, attr_val) except ValueError: continue # skipping bogus links else: url = self.process_attr(attr_val) if url is None: continue url = to_native_str(url, encoding=response_encoding) # to fix relative links after process_value url = urljoin(response_url, url) link = Link(url, _collect_string_content(el) or u'', nofollow=rel_has_nofollow(el.get('rel'))) links.append(link) return self._deduplicate_if_needed(links)
Example #9
Source File: test_monzo_api.py From pymonzo with MIT License | 6 votes |
def test_class_get_oauth_token_method(self, mocker, mocked_monzo): """Test class `_get_oauth_token` method""" mocked_fetch_token = mocker.MagicMock() mocked_oauth2_session = mocker.patch('pymonzo.monzo_api.OAuth2Session') mocked_oauth2_session.return_value.fetch_token = mocked_fetch_token token = mocked_monzo._get_oauth_token() assert token == mocked_fetch_token.return_value mocked_oauth2_session.assert_called_once_with( client_id=mocked_monzo._client_id, redirect_uri=config.REDIRECT_URI, ) mocked_fetch_token.assert_called_once_with( token_url=urljoin(mocked_monzo.api_url, '/oauth2/token'), code=mocked_monzo._auth_code, client_secret=mocked_monzo._client_secret, )
Example #10
Source File: connector.py From designate with Apache License 2.0 | 6 votes |
def _construct_url(self, relative_path, query_params=None, extattrs=None): if query_params is None: query_params = {} if extattrs is None: extattrs = {} if not relative_path or relative_path[0] == '/': raise ValueError('Path in request must be relative.') query = '' if query_params or extattrs: query = '?' if extattrs: attrs_queries = [] for key, value in extattrs.items(): LOG.debug("key: %s, value: %s", key, value) attrs_queries.append('*' + key + '=' + value['value']) query += '&'.join(attrs_queries) if query_params: if len(query) > 1: query += '&' query += parse.urlencode(query_params) baseurl = parse.urljoin(self.wapi_url, parse.quote(relative_path)) return baseurl + query
Example #11
Source File: redirect.py From learn_python3_spider with MIT License | 6 votes |
def process_response(self, request, response, spider): if (request.meta.get('dont_redirect', False) or response.status in getattr(spider, 'handle_httpstatus_list', []) or response.status in request.meta.get('handle_httpstatus_list', []) or request.meta.get('handle_httpstatus_all', False)): return response allowed_status = (301, 302, 303, 307, 308) if 'Location' not in response.headers or response.status not in allowed_status: return response location = safe_url_string(response.headers['location']) redirected_url = urljoin(request.url, location) if response.status in (301, 307, 308) or request.method == 'HEAD': redirected = request.replace(url=redirected_url) return self._redirect(redirected, request, spider, response.status) redirected = self._redirect_request_using_get(request, redirected_url) return self._redirect(redirected, request, spider, response.status)
Example #12
Source File: uploads.py From conda-concourse-ci with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_upload_channels(upload_config_dir, subdir, channels=None): """thought here was to provide whatever channel you have set as an output also to be an input Killed this in favor of setting channels in condarc in the docker image. """ configurations = load_yaml_config_dir(upload_config_dir) channels = channels or [] for config in configurations: if 'token' in config: channels.append(config['user']) elif 'server' in config: channels.append(parse.urljoin('http://' + config['server'], config['destination_path'].format(subdir=subdir))) else: channels.append(config['channel']) return channels
Example #13
Source File: regex.py From learn_python3_spider with MIT License | 6 votes |
def _extract_links(self, response_text, response_url, response_encoding, base_url=None): def clean_text(text): return replace_escape_chars(remove_tags(text.decode(response_encoding))).strip() def clean_url(url): clean_url = '' try: clean_url = urljoin(base_url, replace_entities(clean_link(url.decode(response_encoding)))) except ValueError: pass return clean_url if base_url is None: base_url = get_base_url(response_text, response_url, response_encoding) links_text = linkre.findall(response_text) return [Link(clean_url(url).encode(response_encoding), clean_text(text)) for url, _, text in links_text]
Example #14
Source File: session.py From hfut with MIT License | 6 votes |
def prepare_request(self, request): parsed = parse.urlparse(request.url) # 非法字符检查 if ENV['REQUEST_ARGUMENTS_CHECK'] and (not parsed.netloc or parsed.netloc == parse.urlparse(self.host).netloc): for k, v in reduce(lambda x, y: x + list(y.items()), (request.params, request.data), []): pattern = ENV['ILLEGAL_CHARACTERS_PATTERN'] result = pattern.search(str(k)) or pattern.search(str(v)) if result: msg = ''.join(['参数中出现非法字符: ', result.group()]) raise ValidationError(msg) if not parsed.netloc: # requests 在准备 url 进行解析, 因此只能在准备前将 url 换成完整的地址 # requests.models.PreparedRequest#prepare_url request.url = parse.urljoin(self.host, request.url) return super(BaseSession, self).prepare_request(request)
Example #15
Source File: interface.py From pulsar with Apache License 2.0 | 6 votes |
def __init__(self, destination_params, transport): self.transport = transport remote_host = destination_params.get("url") assert remote_host is not None, "Failed to determine url for Pulsar client." if not remote_host.startswith("http"): remote_host = "http://%s" % remote_host manager = destination_params.get("manager", None) if manager: if "/managers/" in remote_host: log.warning("Ignoring manager tag '%s', Pulsar client URL already contains a \"/managers/\" path." % manager) else: remote_host = urljoin(remote_host, "managers/%s" % manager) if not remote_host.endswith("/"): remote_host = "%s/" % remote_host self.remote_host = remote_host self.private_token = destination_params.get("private_token", None)
Example #16
Source File: stockfighter.py From stockfighter with ISC License | 6 votes |
def place_new_order(self, stock, price, qty, direction, order_type): """Place an order for a stock. https://starfighter.readme.io/docs/place-new-order """ url_fragment = 'venues/{venue}/stocks/{stock}/orders'.format( venue=self.venue, stock=stock, ) data = { "stock": stock, "price": price, "venue": self.venue, "account": self.account, "qty": qty, "direction": direction, "orderType": order_type, } url = urljoin(self.base_url, url_fragment) resp = self.session.post(url, json=data) return resp.json()
Example #17
Source File: sgml.py From learn_python3_spider with MIT License | 6 votes |
def _extract_links(self, response_text, response_url, response_encoding, base_url=None): """ Do the real extraction work """ self.reset() self.feed(response_text) self.close() ret = [] if base_url is None: base_url = urljoin(response_url, self.base_url) if self.base_url else response_url for link in self.links: if isinstance(link.url, six.text_type): link.url = link.url.encode(response_encoding) try: link.url = urljoin(base_url, link.url) except ValueError: continue link.url = safe_url_string(link.url, response_encoding) link.text = to_unicode(link.text, response_encoding, errors='replace').strip() ret.append(link) return ret
Example #18
Source File: sentinel.py From sentinelsat with GNU General Public License v3.0 | 6 votes |
def is_online(self, id): """Returns whether a product is online Parameters ---------- id : string UUID of the product, e.g. 'a8dd0cfd-613e-45ce-868c-d79177b916ed' Returns ------- bool True if online, False if in LTA """ # Check https://scihub.copernicus.eu/userguide/ODataAPI#Products_entity for more information url = urljoin(self.api_url, "odata/v1/Products('{}')/Online/$value".format(id)) r = self.session.get(url, auth=self.session.auth, timeout=self.timeout) _check_scihub_response(r) return r.json()
Example #19
Source File: form.py From learn_python3_spider with MIT License | 5 votes |
def _get_form_url(form, url): if url is None: action = form.get('action') if action is None: return form.base_url return urljoin(form.base_url, strip_html5_whitespace(action)) return urljoin(form.base_url, url)
Example #20
Source File: template_utils.py From eclcli with Apache License 2.0 | 5 votes |
def get_file_contents(from_data, files, base_url=None, ignore_if=None, recurse_if=None, is_object=False, object_request=None): if recurse_if and recurse_if(from_data): if isinstance(from_data, dict): recurse_data = six.itervalues(from_data) else: recurse_data = from_data for value in recurse_data: get_file_contents(value, files, base_url, ignore_if, recurse_if, is_object, object_request) if isinstance(from_data, dict): for key, value in six.iteritems(from_data): if ignore_if and ignore_if(key, value): continue if base_url and not base_url.endswith('/'): base_url = base_url + '/' str_url = parse.urljoin(base_url, value) if str_url not in files: if is_object and object_request: file_content = object_request('GET', str_url) else: file_content = utils.read_url_content(str_url) if is_template(file_content): if is_object: template = get_template_contents( template_object=str_url, files=files, object_request=object_request)[1] else: template = get_template_contents( template_url=str_url, files=files)[1] file_content = jsonutils.dumps(template) files[str_url] = file_content # replace the data value with the normalised absolute URL from_data[key] = str_url
Example #21
Source File: utils.py From eclcli with Apache License 2.0 | 5 votes |
def normalise_file_path_to_url(path): if parse.urlparse(path).scheme: return path path = os.path.abspath(path) return parse.urljoin('file:', request.pathname2url(path))
Example #22
Source File: utils.py From eclcli with Apache License 2.0 | 5 votes |
def base_url_for_url(url): parsed = parse.urlparse(url) parsed_dir = os.path.dirname(parsed.path) return parse.urljoin(url, parsed_dir)
Example #23
Source File: utils.py From eclcli with Apache License 2.0 | 5 votes |
def resolve_param_get_file(file, base_url): if base_url and not base_url.endswith('/'): base_url = base_url + '/' str_url = parse.urljoin(base_url, file) return read_url_content(str_url)
Example #24
Source File: sentinel.py From sentinelsat with GNU General Public License v3.0 | 5 votes |
def get_product_odata(self, id, full=False): """Access OData API to get info about a product. Returns a dict containing the id, title, size, md5sum, date, footprint and download url of the product. The date field corresponds to the Start ContentDate value. If `full` is set to True, then the full, detailed metadata of the product is returned in addition to the above. Parameters ---------- id : string The UUID of the product to query full : bool Whether to get the full metadata for the Product. False by default. Returns ------- dict[str, Any] A dictionary with an item for each metadata attribute Notes ----- For a full list of mappings between the OpenSearch (Solr) and OData attribute names see the following definition files: https://github.com/SentinelDataHub/DataHubSystem/blob/master/addon/sentinel-1/src/main/resources/META-INF/sentinel-1.owl https://github.com/SentinelDataHub/DataHubSystem/blob/master/addon/sentinel-2/src/main/resources/META-INF/sentinel-2.owl https://github.com/SentinelDataHub/DataHubSystem/blob/master/addon/sentinel-3/src/main/resources/META-INF/sentinel-3.owl """ url = urljoin(self.api_url, "odata/v1/Products('{}')?$format=json".format(id)) if full: url += "&$expand=Attributes" response = self.session.get(url, auth=self.session.auth, timeout=self.timeout) _check_scihub_response(response) values = _parse_odata_response(response.json()["d"]) return values
Example #25
Source File: sentinel.py From sentinelsat with GNU General Public License v3.0 | 5 votes |
def _format_url(self, order_by=None, limit=None, offset=0): if limit is None: limit = self.page_size limit = min(limit, self.page_size) url = "search?format=json&rows={}".format(limit) url += "&start={}".format(offset) if order_by: url += "&orderby={}".format(order_by) return urljoin(self.api_url, url)
Example #26
Source File: serve.py From pipenv with MIT License | 5 votes |
def join(self, url, allow_fragments=True): return urljoin(self.url, url, allow_fragments=allow_fragments)
Example #27
Source File: allocator_remote.py From universe with MIT License | 5 votes |
def _get_request(self, route): url = urlparse.urljoin(self.base_url, route) extra_logger.info("[%s] GET %s", self.label, url) resp = self.session.get(url, auth=(self.api_key, ''), timeout=self.request_timeout) return self._handle_resp(resp)
Example #28
Source File: allocator_remote.py From universe with MIT License | 5 votes |
def _delete_request(self, route): url = urlparse.urljoin(self.base_url, route) extra_logger.info("[%s] DELETE %s", self.label, url) resp = self.session.delete(url, auth=(self.api_key, ''), timeout=self.request_timeout) return self._handle_resp(resp)
Example #29
Source File: allocator_remote.py From universe with MIT License | 5 votes |
def _post_request(self, route, data, description): url = urlparse.urljoin(self.base_url, route) extra_logger.info('[%s] %s: POST %s: %s', self.label, description, url, json.dumps(data)) resp = self.session.post(urlparse.urljoin(self.base_url, route), data=json.dumps(data), auth=(self.api_key, ''), timeout=self.request_timeout, ) return self._handle_resp(resp)
Example #30
Source File: client.py From osim-rl with MIT License | 5 votes |
def _get_request(self, route): url = urlparse.urljoin(self.remote_base, route) logger.info("GET {}".format(url)) resp = self.session.get(url) return self._parse_server_error_or_raise_for_status(resp)