Python urllib.request.url() Examples
The following are 21
code examples of urllib.request.url().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
urllib.request
, or try the search function
.
Example #1
Source File: distributed_scheduler.py From scrapy-cluster with MIT License | 6 votes |
def request_to_dict(self, request): ''' Convert Request object to a dict. modified from scrapy.utils.reqser ''' req_dict = { # urls should be safe (safe_string_url) 'url': to_unicode(request.url), 'method': request.method, 'headers': dict(request.headers), 'body': request.body, 'cookies': request.cookies, 'meta': request.meta, '_encoding': request._encoding, 'priority': request.priority, 'dont_filter': request.dont_filter, # callback/errback are assumed to be a bound instance of the spider 'callback': None if request.callback is None else request.callback.__name__, 'errback': None if request.errback is None else request.errback.__name__, } return req_dict
Example #2
Source File: peba.py From PEBA with GNU General Public License v3.0 | 6 votes |
def retrieveAlertStats(): """ Retrieve combined statistics AlertsLastMinute, AlertsLastHour, AlertsLast24Hours """ # get result from cache getCacheResult = getCache(request.url, "url") if getCacheResult is not False: return jsonify(getCacheResult) # query ES else: returnResult = formatAlertStats(queryAlertStats(checkCommunityIndex(request), getRelevantIndices(2))) setCache(request.url, returnResult, 13, "url") app.logger.debug('UNCACHED %s' % str(request.url)) return jsonify(returnResult)
Example #3
Source File: peba.py From PEBA with GNU General Public License v3.0 | 6 votes |
def retrieveDatasetAlertTypesPerMonth(): """ Retrieve the attacks / day in the last x days from elasticsearch, split by attack group and return as JSON for the last x months, defaults to last month, if no GET parameter days is given """ # get result from cache getCacheResult = getCache(request.url, "url") if getCacheResult is not False: return jsonify(getCacheResult) # query ES else: if not request.args.get('days'): # Using default : within the last month (max 31 day indices) returnResult = formatDatasetAlertTypesPerMonth(queryDatasetAlertTypesPerMonth(None, checkCommunityIndex(request), getRelevantIndices(32))) else: if request.args.get('days').isdecimal() and int(request.args.get('days')) <= 31: indexDays = int(request.args.get('days'))+1 else: indexDays = 0 returnResult = formatDatasetAlertTypesPerMonth(queryDatasetAlertTypesPerMonth(request.args.get('days'), checkCommunityIndex(request), getRelevantIndices(indexDays))) setCache(request.url, returnResult, 3600, "url") return jsonify(returnResult)
Example #4
Source File: peba.py From PEBA with GNU General Public License v3.0 | 6 votes |
def retrieveDatasetAlertsPerMonth(): """ Retrieve the attacks / day in the last x days from elasticsearch and return as JSON for the last months, defaults to last month, if no GET parameter days is given """ # get result from cache getCacheResult = getCache(request.url, "url") if getCacheResult is not False: return jsonify(getCacheResult) # query ES else: if not request.args.get('days'): # Using default : within the last month (max 31 day indices) returnResult = formatDatasetAlertsPerMonth(queryDatasetAlertsPerMonth(None, checkCommunityIndex(request), getRelevantIndices(32))) else: if request.args.get('days').isdecimal() and int(request.args.get('days'))<=31: indexDays = int(request.args.get('days')) + 1 else: indexDays = 0 returnResult = formatDatasetAlertsPerMonth(queryDatasetAlertsPerMonth(request.args.get('days'), checkCommunityIndex(request), getRelevantIndices(indexDays))) setCache(request.url, returnResult, 600, "url") return jsonify(returnResult)
Example #5
Source File: peba.py From PEBA with GNU General Public License v3.0 | 6 votes |
def retrieveAlertsJson(): """ Retrieve last 5 Alerts in JSON without IPs """ # set cacheItem independent from url parameters, respect community index cacheEntry = request.url # get result from cache getCacheResult = getCache(cacheEntry, "url") if getCacheResult is not False: app.logger.debug('Returning /retrieveAlertsJson from Cache %s' % str(request.remote_addr)) return jsonify(getCacheResult) # query ES else: numAlerts = 35 # Retrieve last X Alerts from ElasticSearch and return JSON formatted with limited alert content returnResult = formatAlertsJson(queryAlertsWithoutIP(numAlerts, checkCommunityIndex(request), getRelevantIndices(2))) setCache(cacheEntry, returnResult, 25, "url") app.logger.debug('UNCACHED %s' % str(request.url)) return jsonify(returnResult)
Example #6
Source File: peba.py From PEBA with GNU General Public License v3.0 | 6 votes |
def retrieveIPs15m(): """ Retrieve IPs from the last 15mins from ElasticSearch and return formatted XML or JSON with IPs """ if request.args.get('out') and request.args.get('out') == 'json': getCacheResult = getCache(request.url, "url") if getCacheResult is not False: return jsonify(getCacheResult) else: returnResult = formatBadIP( queryBadIPs(15, checkCommunityIndex(request), getRelevantIndices(2)), 'json') setCache(request.url, returnResult, 60, "url") return jsonify(returnResult) else: getCacheResult = getCache(request.url, "url") if getCacheResult is not False: return Response(getCacheResult, mimetype='text/xml') else: returnResult = formatBadIP( queryBadIPs(15, checkCommunityIndex(request), getRelevantIndices(2)), 'xml') setCache(request.url, returnResult, 60, "url") return Response(returnResult, mimetype='text/xml') # Routes with JSON output
Example #7
Source File: peba.py From PEBA with GNU General Public License v3.0 | 6 votes |
def retrieveIPs(): """ Retrieve IPs from ElasticSearch and return formatted XML or JSON with IPs """ if request.args.get('out') and request.args.get('out') == 'json': getCacheResult = getCache(request.url, "url") if getCacheResult is not False: return jsonify(getCacheResult) else: returnResult = formatBadIP( queryBadIPs(app.config['BADIPTIMESPAN'], checkCommunityIndex(request), getRelevantIndices(2)), 'json') setCache(request.url, returnResult, 60, "url") return jsonify(returnResult) else: getCacheResult = getCache(request.url, "url") if getCacheResult is not False: return Response(getCacheResult, mimetype='text/xml') else: returnResult = formatBadIP( queryBadIPs(app.config['BADIPTIMESPAN'], checkCommunityIndex(request), getRelevantIndices(2)), 'xml') setCache(request.url, returnResult, 60, "url") return Response(returnResult, mimetype='text/xml')
Example #8
Source File: peba.py From PEBA with GNU General Public License v3.0 | 6 votes |
def querySingleIP(): """ Retrieve Attack data from index about a single IP """ # get result from cache getCacheResult = getCache(request.url, "url") if getCacheResult is not False: app.logger.debug('Returning /querySingleIP from Cache for %s' % str(request.remote_addr)) return Response(getCacheResult) # query ES else: returnResult = formatSingleIP(queryForSingleIP(app.config['MAXALERTS'], request.args.get('ip'), checkCommunityIndex(request), getRelevantIndices(0))) setCache(request.url, returnResult, 60, "url") app.logger.debug('Returning /querySingleIP from ES for %s' % str(request.remote_addr)) return Response(returnResult, mimetype='text/xml') # Routes with both XML and JSON output
Example #9
Source File: peba.py From PEBA with GNU General Public License v3.0 | 6 votes |
def retrieveAlertsCyber(): """ Retrieve Alerts from ElasticSearch and return formatted XML with limited alert content """ # get result from cache getCacheResult = getCache(request.url, "url") if getCacheResult is not False: app.logger.debug('Returning /retrieveAlertsCyber from Cache for %s' % str(request.remote_addr)) return Response(getCacheResult) # query ES else: returnResult = formatAlertsXml(queryAlerts(app.config['MAXALERTS'], checkCommunityIndex(request), getRelevantIndices(2))) setCache(request.url, returnResult, 1, "url") app.logger.debug('Returning /retrieveAlertsCyber from ES for %s' % str(request.remote_addr)) return Response(returnResult, mimetype='text/xml')
Example #10
Source File: peba.py From PEBA with GNU General Public License v3.0 | 5 votes |
def retrieveAlertsCount(): """ Retrieve number of alerts in timeframe (GET-Parameter time as decimal or "day") """ # Retrieve Number of Alerts from ElasticSearch and return as xml / json if not request.args.get('time'): app.logger.error('No time GET-parameter supplied in retrieveAlertsCount. Must be decimal number (in minutes) or string "day"') return app.config['DEFAULTRESPONSE'] else: if request.args.get('out') and request.args.get('out') == 'json': # get result from cache getCacheResult = getCache(request.url, "url") if getCacheResult is not False: return jsonify(getCacheResult) else: if request.args.get('time').isdecimal() and int(request.args.get('time')) <= 46080: indexDays=(int(int(request.args.get('time'))/1440))+2 elif request.args.get('time') == "day": indexDays=1 else: indexDays=0 returnResult = formatAlertsCount(queryAlertsCount(request.args.get('time'), checkCommunityIndex(request), getRelevantIndices(indexDays)), 'json') setCache(request.url, returnResult, 60, "url") return jsonify(returnResult) else: # get result from cache getCacheResult = getCache(request.url, "url") if getCacheResult is not False: return Response(getCacheResult, mimetype='text/xml') else: if request.args.get('time').isdecimal() and int(request.args.get('time')) <= 46080: indexDays=(int(int(request.args.get('time'))/1440))+2 elif request.args.get('time') == "day": indexDays=1 else: indexDays=0 returnResult = formatAlertsCount(queryAlertsCount(request.args.get('time'), checkCommunityIndex(request), getRelevantIndices(indexDays)), 'xml') setCache(request.url, returnResult, 60, "url") return Response(returnResult, mimetype='text/xml')
Example #11
Source File: app.py From fb-feed-gen with GNU General Public License v2.0 | 5 votes |
def generate_feed(): # app.logger.warning(request.args) param = request.args.get('username') if param: username = urllib.parse.unquote(param).strip() match, display = fetch.is_valid_username(username) if (match): # get posts site_url = fetch.build_site_url(username) data = fetch.get_remote_data(site_url) items = fetch.extract_items(username, data) if (items and len(items) > 0): # create feed feed = AtomFeed('{0} FB Posts'.format(display), subtitle=site_url, feed_url=request.url, url=request.url_root) for post in items: feed.add(post['title'], post['article'], content_type='html', author=post['author'], url=post['url'], updated=post['date'], published=post['date']) return feed.get_response() else: return 'No posts found. Are you sure you put in the correct username?' else: return 'Invalid username provided' else: return 'No username provided in query string' # launch
Example #12
Source File: web.py From mailur with GNU General Public License v3.0 | 5 votes |
def proxy_by_nginx(url): url = '/.proxy?url=%s' % url response.set_header('X-Accel-Redirect', url) return ''
Example #13
Source File: web.py From mailur with GNU General Public License v3.0 | 5 votes |
def redirect(url, code=None): if not code: code = 303 if request.get('SERVER_PROTOCOL') == 'HTTP/1.1' else 302 response.status = code response.body = '' response.set_header('Location', urllib.parse.urljoin(request.url, url)) return response
Example #14
Source File: web.py From mailur with GNU General Public License v3.0 | 5 votes |
def proxy(): url = request.query.get('url') if not url: return abort(400) return proxy_by_nginx(url)
Example #15
Source File: web.py From mailur with GNU General Public License v3.0 | 5 votes |
def avatars(): hashes = set(request.query['hashes'].split(',')) size = request.query.get('size', 20) default = request.query.get('default', 'identicon') cls = request.query.get('cls', '.pic-%s') response.content_type = 'text/css' return '\n'.join(( '%s {background-image: url(data:image/gif;base64,%s);}' % ((cls % h), i.decode()) ) for h, i in fetch_avatars(hashes, size, default))
Example #16
Source File: peba.py From PEBA with GNU General Public License v3.0 | 5 votes |
def retrieveTopCountriesAttacks(): """ Retrieve the Top X countries and their attacks within month """ # get result from cache getCacheResult = getCache(request.url, "url") if getCacheResult is not False: return jsonify(getCacheResult) # query ES else: if not request.args.get('monthOffset'): # Using default : within the last month offset = None else: offset = request.args.get('monthOffset') if not request.args.get('topx'): # Using default top 10 topx = None else: topx = request.args.get('topx') returnResult = formatTopCountriesAttacks(queryTopCountriesAttacks(offset, topx, checkCommunityIndex(request), getRelevantIndices(0))) setCache(request.url, returnResult, 60, "url") app.logger.debug('UNCACHED %s' % str(request.url)) return jsonify(returnResult)
Example #17
Source File: peba.py From PEBA with GNU General Public License v3.0 | 5 votes |
def tpotstats(): """ Retrieve statistics on tpot community installations. """ today = str(datetime.date.today()).replace("-","") # get result from cache getCacheResult = getCache(request.url, "url") if getCacheResult is not False: return jsonify(getCacheResult) # query ES else: if not request.args.get('day'): # Using default : today offset = None else: offset = request.args.get('day') returnResult = getTPotAlertStatsJson(app, es, getRelevantIndices(0), offset) if not returnResult: return app.config['DEFAULTRESPONSE'] if not request.args.get('day') == today: setCache(request.url, returnResult, 60*1440*28, "url") return jsonify(returnResult) else: return jsonify(returnResult)
Example #18
Source File: http_helpers.py From shavar with Mozilla Public License 2.0 | 5 votes |
def proxy(request, scheme, netloc, timeout=5): """Proxies and return the result from the other server. - scheme: http or https - netloc: proxy location """ parsed = urlparse(request.url) path = parsed.path params = parsed.params query = parsed.query fragment = parsed.fragment url = urlunparse((scheme, netloc, path, params, query, fragment)) method = request.method data = request.body # copying all X- headers xheaders = {} for header, value in list(request.headers.items()): if not header.startswith('X-'): continue xheaders[header] = value if 'X-Forwarded-For' not in request.headers: xheaders['X-Forwarded-For'] = request.remote_addr if hasattr(request, '_authorization'): xheaders['Authorization'] = request._authorization status, headers, body = get_url(url, method, data, timeout=timeout, extra_headers=xheaders) return Response(body, status, list(headers.items()))
Example #19
Source File: distributed_scheduler.py From scrapy-cluster with MIT License | 4 votes |
def enqueue_request(self, request): ''' Pushes a request from the spider into the proper throttled queue ''' if not request.dont_filter and self.dupefilter.request_seen(request): self.logger.debug("Request not added back to redis") return req_dict = self.request_to_dict(request) if not self.is_blacklisted(req_dict['meta']['appid'], req_dict['meta']['crawlid']): # grab the tld of the request ex_res = self.extract(req_dict['url']) key = "{sid}:{dom}.{suf}:queue".format( sid=req_dict['meta']['spiderid'], dom=ex_res.domain, suf=ex_res.suffix) curr_time = time.time() domain = "{d}.{s}".format(d=ex_res.domain, s=ex_res.suffix) # allow only if we want all requests or we want # everything but blacklisted domains # insert if crawl never expires (0) or time < expires if (self.backlog_blacklist or (not self.backlog_blacklist and domain not in self.black_domains)) and \ (req_dict['meta']['expires'] == 0 or curr_time < req_dict['meta']['expires']): # we may already have the queue in memory if key in self.queue_keys: self.queue_dict[key][0].push(req_dict, req_dict['meta']['priority']) else: # shoving into a new redis queue, negative b/c of sorted sets # this will populate ourself and other schedulers when # they call create_queues self.redis_conn.zadd(key, ujson.dumps(req_dict), -req_dict['meta']['priority']) self.logger.debug("Crawlid: '{id}' Appid: '{appid}' added to queue" .format(appid=req_dict['meta']['appid'], id=req_dict['meta']['crawlid'])) else: self.logger.debug("Crawlid: '{id}' Appid: '{appid}' expired" .format(appid=req_dict['meta']['appid'], id=req_dict['meta']['crawlid'])) else: self.logger.debug("Crawlid: '{id}' Appid: '{appid}' blacklisted" .format(appid=req_dict['meta']['appid'], id=req_dict['meta']['crawlid']))
Example #20
Source File: peba.py From PEBA with GNU General Public License v3.0 | 4 votes |
def stats(): """ Retrieve detailed statistics of community installations. """ # get result from cache getCacheResult = getCache(urllib.parse.quote_plus(request.url), "url") if getCacheResult is not False: return jsonify(getCacheResult) else: queryValue = [] if not request.args.get('values'): # Using default : none queryValue=[] else: for i in urllib.parse.unquote_plus(request.args.get('values')).split(','): queryValue.append(i) # check start / end times # gte if not request.args.get('gte'): gte = (datetime.datetime.utcnow()+datetime.timedelta(days=-1)).strftime('%Y-%m-%d %H:%M:%S') app.logger.error("getStats: no gte value given, setting to default now-24h") else: try: datetime.datetime.strptime(urllib.parse.unquote_plus(request.args.get('gte')), '%Y-%m-%d %H:%M:%S') gte = urllib.parse.unquote_plus(request.args.get('gte')) except ValueError: app.logger.debug("getStats: Incorrect date format for gte, falling back to default gte") gte = (datetime.datetime.utcnow() + datetime.timedelta(days=-1)).strftime('%Y-%m-%d %H:%M:%S') # lt if not request.args.get('lt'): lt = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') app.logger.error("getStats: no lt value given, setting to default now()") else: try: datetime.datetime.strptime(urllib.parse.unquote_plus(request.args.get('lt')), '%Y-%m-%d %H:%M:%S') lt = urllib.parse.unquote_plus(request.args.get('lt')) except ValueError: app.logger.debug("getStats: Incorrect date format for lt, falling back to default lt") lt = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') returnResult = getStats(app, es, statisticIndex, gte, lt, queryValue) if not returnResult: return app.config['DEFAULTRESPONSE'] else: setCache(urllib.parse.quote_plus(request.url), returnResult, 60*30, "url") return jsonify(returnResult)
Example #21
Source File: peba.py From PEBA with GNU General Public License v3.0 | 4 votes |
def topx(): """ Retrieve the top x URLs/ports and gather their timeline . """ # get result from cache getCacheResult = getCache(urllib.parse.quote_plus(request.url), "url") if getCacheResult is not False: return jsonify(getCacheResult) else: # get topx if not request.args.get('topx'): topnumber = 10 elif request.args.get('topx').isdecimal() and int(request.args.get('topx')) <= 30: topnumber = request.args.get('topx') else: return app.config['DEFAULTRESPONSE'] # check Type if not request.args.get('type'): return app.config['DEFAULTRESPONSE'] else: if request.args.get('type') in ['destports', 'urls']: toptype = request.args.get('type') else: return app.config['DEFAULTRESPONSE'] # check timespan # days if not request.args.get('days'): days = 1 indices = getRelevantIndices(days + 1) elif request.args.get('days') in ["1", "7", "28"]: days = int(request.args.get('days')) if days == 28: indices = getRelevantIndices(0) else: indices = getRelevantIndices(days + 1) else: return app.config['DEFAULTRESPONSE'] returnResult = getTops(app, es, indices, days, toptype, topnumber) if not returnResult: return app.config['DEFAULTRESPONSE'] else: setCache(urllib.parse.quote_plus(request.url), returnResult, 3600*2, "url") return jsonify(returnResult) # PUT Service