Python tld.get_tld() Examples
The following are 30
code examples of tld.get_tld().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tld
, or try the search function
.
Example #1
Source File: crawler.py From ITWSV with MIT License | 7 votes |
def __init__(self, response: Response, url: str, empty: bool = False): """Create a new Page object. @type response: Response @param response: a requests Response instance. @type url: str @param url: URL of the Page. @type empty: bool @param empty: whether the Page is empty (body length == 0)""" self._response = response self._url = url self._base = None self._soup = None self._is_empty = empty try: self._tld = get_tld(url) except TldDomainNotFound: self._tld = urlparse(url).netloc
Example #2
Source File: buckethunter.py From AttackSurfaceMapper with GNU General Public License v3.0 | 6 votes |
def passive_query(mswitch,hostx, key): keywords = get_tld(hostx.primary_domain, as_object=True, fail_silently=True, fix_protocol=True).domain if keywords is None: return if mswitch is True: print("[DEBUG] Keywords : ",keywords) par = {'access_token': key, 'keywords': keywords} try: response = requests.get("https://buckets.grayhatwarfare.com/api/v1/buckets", params=par, timeout=4) gwf_api = response.json() if gwf_api["buckets_count"] > 0: try: for bucket in gwf_api["buckets"]: hostx.buckets.append(bucket["bucket"]) except: pass except: cprint("error", "[*] Error: connecting with GrayHatWarfare API", 1)
Example #3
Source File: AddOneLetter.py From squatm3 with GNU General Public License v3.0 | 6 votes |
def add_one_letter(self): ''' This function adds the same letter after the correct one tesla.com - ttesla.com - teesla.com - tessla.com - teslla.com - teslaa.com ''' url = get_tld(self.url, as_object=True, fix_protocol=True) domain = url.domain new_urls_with_double_letter = [] n = 0 m = len(domain) while n < m: new_domain = domain[0:n] + domain[n] + domain[n] + domain[n+1:m] n = n + 1 new_urls_with_double_letter.append(new_domain) return new_urls_with_double_letter
Example #4
Source File: RemoveOneLetter.py From squatm3 with GNU General Public License v3.0 | 6 votes |
def remove_letters(self): ''' :return: ''' url = get_tld(self.url, as_object=True, fix_protocol=True) domain = url.domain new_urls_without_letter = [] n = 0 m = len(domain) while n < m: new_domain = domain[0:n] + domain[n+1:m] n = n + 1 new_urls_without_letter.append(new_domain) new_urls_list = list(set(new_urls_without_letter)) return new_urls_list
Example #5
Source File: HomoglyphAttack2.py From squatm3 with GNU General Public License v3.0 | 6 votes |
def switch_all_letters(self): """ The following function generates all the possible combinations using homoglyphs """ url = get_tld(self.url, as_object=True, fix_protocol=True) domain = url.domain domains = hg.Homoglyphs().get_combinations(domain) a = [] i = 0 print("Generated " + str(len(domains)) + " domains\n") for domain in domains: idna_domain = domain.encode('idna').decode('idna') if not a.__contains__(idna_domain): a.append(domain.encode('idna').decode('idna')) i = i+1 print(str(i) + ' out of ' + str(len(domains)) + ' domains: ' + str(len(a))) return a
Example #6
Source File: medium.py From mma-dexter with Apache License 2.0 | 6 votes |
def is_tld_exception(cls, url): """ Test if the url falls within one of the exceptions, this is intended to handle instances where get_tld() calls fail to recognise urls (eg: .co.tz fials...) """ url_exceptions = [ 'thecitizen.co.tz', 'dailynews.co.tz', 'nigeriatoday.ng', 'nta.ng', 'nan.ng', 'leadership.ng', 'independent.ng', 'guardian.ng', 'dailytimes.ng', 'theinterview.ng', 'city-press.news24.com' ] for ex in url_exceptions: # check if it exists in the url add buffer for [https://www.] characters at start if ex in url[:len(ex)+12]: return ex return None
Example #7
Source File: hook.py From letsencrypt-rackspace-hook with Apache License 2.0 | 6 votes |
def delete_txt_record(args): """ Clean up the TXT record when it is no longer needed. Keyword arguments args -- passed from letsencrypt.sh """ domain_name = args[0] base_domain_name = get_tld("http://{0}".format(domain_name)) domain = get_domain(base_domain_name) # Get the DNS record object(s) for our challenge record(s) name = "{0}.{1}".format('_acme-challenge', domain_name) dns_records = list(rax_dns.get_record_iterator(domain)) text_records = [x for x in dns_records if x.type == 'TXT'] # Delete any matching records we find for text_record in text_records: if text_record.name == name: text_record.delete() return True
Example #8
Source File: datasploit.py From datasploit with GNU General Public License v3.0 | 6 votes |
def auto_select_target(target, output=None): """Auto selection logic""" print "Target: %s" % target try: inp=IPAddress(target); if inp.is_private() or inp.is_loopback(): print "Internal IP Detected : Skipping" sys.exit() else: print "Looks like an IP, running ipOsint...\n" ipOsint.run(target, output) except SystemExit: print "exiting" except AddrFormatError: if re.match('[^@]+@[^@]+\.[^@]+', target): print "Looks like an EMAIL, running emailOsint...\n" emailOsint.run(target, output) elif get_tld(target, fix_protocol=True,fail_silently=True) is not None: print "Looks like a DOMAIN, running domainOsint...\n" domainOsint.run(target, output) else: print "Nothing Matched assuming username, running usernameOsint...\n" usernameOsint.run(target, output) except: print "Unknown Error Occured"
Example #9
Source File: get_links3.py From Malicious_Domain_Whois with GNU General Public License v3.0 | 6 votes |
def judge_a_links(): global white_list global balck_list global soup_q global res_q while True: try: domain, soup = soup_q.get(timeout=50) except Queue.Empty: break mal_urls = [] for a in soup.find_all('a'): try: url = a['href'] url_domain = get_tld(url) if url_domain != domain: # 获取除本网站站内连接之外的链接 if url_domain in black_list or url_domain not in white_list: # 目前先将不在白名单中域名url都放入malicious_link表内,待以后malicious_link表足够完全后,再只用黑名单 mal_urls.append((url, url_domain)) except Exception, e: # logger.info(domain + ' GET LINKS WRONG ...') continue res_q.put([domain, mal_urls])
Example #10
Source File: crawler.py From ITWSV with MIT License | 6 votes |
def is_in_scope(self, resource): if isinstance(resource, web.Request): if self._scope == Scope.FOLDER: return resource.url.startswith(self._base.path) elif self._scope == Scope.PAGE: return resource.path == self._base.path elif self._scope == Scope.URL: return resource.url == self._base.url else: # Scope.DOMAIN try: return get_tld(resource.url) == get_tld(self._base.url) except TldDomainNotFound: return resource.hostname == self._base.hostname else: if self._scope == Scope.FOLDER: return resource.startswith(self._base.path) elif self._scope == Scope.PAGE: return resource.split("?")[0] == self._base.path elif self._scope == Scope.URL: return resource == self._base.url else: # Scope.DOMAIN try: return get_tld(resource) == get_tld(self._base.url) except TldDomainNotFound: return urlparse(resource).netloc == self._base.hostname
Example #11
Source File: crawler.py From ITWSV with MIT License | 6 votes |
def is_external_to_domain(self, url: str) -> bool: """Returns True if url is under another TLD than the crawled URL, False otherwise. @type url: str @param url: An absolute URL (with protocol prefix) @rtype: bool """ try: tld = get_tld(url) except TldDomainNotFound: # Not yet known TLD or IP address or local hostname tld = urlparse(url).netloc except TldBadUrl: tld = None print("bad url", url, "found within", self._url) return tld != self._tld
Example #12
Source File: medium.py From mma-dexter with Apache License 2.0 | 5 votes |
def for_url(cls, url): domain = get_tld(url, fail_silently=True) # fail silently if domain is None: domain = cls.is_tld_exception(url) if domain is None: return None parts = urlparse(url) # iol.co.za/isolezwe domain = domain + parts.path # explicitly look for city-press, subdomain does not play nice with current Dexter code if 'city-press.news24.com' in url: medium = Medium.query.get(5) return medium else: # find the medium with the longest matching domain for medium in sorted(Medium.query.all(), key=lambda m: len(m.domain or ''), reverse=True): if medium.domain and domain.startswith(medium.domain): return medium return None
Example #13
Source File: hook.py From letsencrypt-cloudflare-hook with MIT License | 5 votes |
def _get_zone_id(domain): tld = get_tld('http://' + domain) url = "https://api.cloudflare.com/client/v4/zones?name={0}".format(tld) r = requests.get(url, headers=CF_HEADERS) r.raise_for_status() return r.json()['result'][0]['id'] # https://api.cloudflare.com/#dns-records-for-a-zone-dns-record-details
Example #14
Source File: domain.py From memex-explorer with BSD 2-Clause "Simplified" License | 5 votes |
def extract_tld(self, url): try: return get_tld(url) except: traceback.print_exc() print "\n\nInvalid url: %s" % url return url
Example #15
Source File: get_links.py From Malicious_Domain_Whois with GNU General Public License v3.0 | 5 votes |
def get_a_links(url, white_list): try: source_domain = get_tld(url) except Exception, e: # print str(e) return {}
Example #16
Source File: commons.py From Analyst-Arsenal with GNU General Public License v3.0 | 5 votes |
def score_domain(config, domain, args): """ """ score = 0 for t in config["tlds"]: if domain.endswith(t): score += 20 try: res = get_tld(domain, as_object=True, fail_silently=True, fix_protocol=True) if res is not None: domain = '.'.join([res.subdomain, res.domain]) except Exception as err: message_failed(args, err, domain) pass score += int(round(entropy.shannon_entropy(domain)*50)) domain = unconfuse(domain) words_in_domain = re.split(r"\W+", domain) if words_in_domain[0] in ["com", "net", "org"]: score += 10 for word in config["keywords"]: if word in domain: score += config["keywords"][word] for key in [k for (k,s) in config["keywords"].items() if s >= 70]: for word in [w for w in words_in_domain if w not in ["email", "mail", "cloud"]]: if distance(str(word), str(key)) == 1: score += 70 if "xn--" not in domain and domain.count("-") >= 4: score += domain.count("-") * 3 if domain.count(".") >= 3: score += domain.count(".") * 3 return score
Example #17
Source File: get_domain.py From Malicious_Domain_Whois with GNU General Public License v3.0 | 5 votes |
def get_domain(): global url_q while True: if url_q.empty(): break url = url_q.get() try: domain = str(get_tld(url)) except: logger.info(url + 'get domian wrong ...') continue res_q.put([url, domain]) print 'get domains over ...'
Example #18
Source File: bounty-monitor.py From bounty-monitor with MIT License | 5 votes |
def update_subdomain(subdomain, alive): """Subdomain database is maintained locally to keep track of identified live and known subdomains.""" tld = get_tld(subdomain, as_object=True, fail_silently=True, fix_protocol=True) try: #synchronize multithread DB_CURSOR.execute LOCK.acquire(True) if alive == "N": DB_CURSOR.execute("insert into subdomains(subdomain, domain, first_found, alive, source) values(?, ?, ?, ?, ?)", (subdomain, tld.tld, datetime.now(), 0, "BountyMonitor")) CONNECTION.commit() elif alive == "Y": DB_CURSOR.execute("update subdomains set alive=1 where subdomain = ?", (subdomain, )) CONNECTION.commit() finally: LOCK.release()
Example #19
Source File: bounty-monitor.py From bounty-monitor with MIT License | 5 votes |
def monitor(message, context): """certstream events callback handler""" all_domains = "" if message['message_type'] == "heartbeat": return if message["message_type"] == "certificate_update": all_domains = message["data"]["leaf_cert"]["all_domains"] for domain in set(all_domains): PBAR.update(1) # all magic happens here try: if domain.count(".") > 1 and not domain.startswith("*.") and not re.search("\d$", domain) and "cloudflaressl" not in domain and "xn--" not in domain and not domain.endswith("local"): tld = get_tld(domain, as_object=True, fail_silently=True, fix_protocol=True) if tld is not None and tld.tld in BOUNTY_LIST and tld.tld != domain and tld.subdomain != "www": if check_subdomain_not_known_in_db(domain): update_subdomain(domain, "N") MONITOR_QUEUE.put(domain) except Exception as e: logging.exception("message") print (domain) t.sleep(.1)
Example #20
Source File: Flipper.py From squatm3 with GNU General Public License v3.0 | 5 votes |
def flip_letters(self): ''' The following function ''' url = get_tld(self.url, as_object=True, fix_protocol=True) domain = url.domain new_urls_without_letter = [] n = 0 m = len(domain) if m == 1: new_urls_without_letter.append(domain) elif m == 2: new_domain = domain[1] + domain[0] new_urls_without_letter.append(new_domain) else: while n < m and m > 2: if n == 0 : new_domain = domain[n + 1] + domain[n] + domain[n + 2:m] elif n == 1: new_domain = domain[0] + domain[n + 1] + domain[n] + domain[n + 2:m] elif 1 < n < m - 1: new_domain = domain[0:n] + domain[n + 1] + domain[n] + domain[n + 2:m] n = n + 1 new_urls_without_letter.append(new_domain) new_urls_list = list(set(new_urls_without_letter)) return new_urls_list
Example #21
Source File: dns_oa.py From incubator-spot with Apache License 2.0 | 5 votes |
def _add_tld_column(self): qry_name_col = self._conf['dns_results_fields']['dns_qry_name'] self._dns_scores = [conn + [ get_tld("http://" + str(conn[qry_name_col]), fail_silently=True) if "http://" not in str(conn[qry_name_col]) else get_tld(str(conn[qry_name_col]), fail_silently=True)] for conn in self._dns_scores ]
Example #22
Source File: hook.py From letsencrypt-rackspace-hook with Apache License 2.0 | 5 votes |
def get_domain(domain_name): """ Query the Rackspace DNS API to get a domain object for the domain name. Keyword arguments: domain_name -- the domain name that needs a challenge record """ base_domain_name = get_tld("http://{0}".format(domain_name)) domain = rax_dns.find(name=base_domain_name) return domain
Example #23
Source File: wpad.py From pypac with Apache License 2.0 | 5 votes |
def proxy_urls_from_dns(local_hostname=None): """ Generate URLs from which to look for a PAC file, based on a hostname. Fully-qualified hostnames are checked against the Mozilla Public Suffix List to ensure that generated URLs don't go outside the scope of the organization. If the fully-qualified hostname doesn't have a recognized TLD, such as in the case of intranets with '.local' or '.internal', the TLD is assumed to be the part following the rightmost dot. :param str local_hostname: Hostname to use for generating the WPAD URLs. If not provided, the local hostname is used. :return: PAC URLs to try in order, according to the WPAD protocol. If the hostname isn't qualified or is otherwise invalid, an empty list is returned. :rtype: list[str] """ if not local_hostname: local_hostname = socket.getfqdn() if '.' not in local_hostname or len(local_hostname) < 3 or \ local_hostname.startswith('.') or local_hostname.endswith('.'): return [] try: parsed = get_tld('http://' + local_hostname, as_object=True) subdomain, tld = parsed.subdomain, parsed.fld except TldDomainNotFound: final_dot_index = local_hostname.rfind('.') subdomain, tld = local_hostname[0:final_dot_index], local_hostname[final_dot_index+1:] return wpad_search_urls(subdomain, tld)
Example #24
Source File: get_whois.py From armory with GNU General Public License v3.0 | 5 votes |
def run(domains): whois_domains = {} if type(domains) == str: domains = [domains] for domain in domains: tld = get_tld("blah://%s" % domain) if whois_domains.get(tld, False): whois_domains[tld]["subdomains"].append(domain.lower()) else: whois_domains[tld] = {"subdomains": [domain.lower()]} return whois_domains
Example #25
Source File: utils.py From Photon with GNU General Public License v3.0 | 5 votes |
def top_level(url, fix_protocol=True): """Extract the top level domain from an URL.""" ext = tld.get_tld(url, fix_protocol=fix_protocol) toplevel = '.'.join(urlparse(url).netloc.split('.')[-2:]).split( ext)[0] + ext return toplevel
Example #26
Source File: subdomain_brute.py From butian-src-domains with GNU General Public License v3.0 | 5 votes |
def update_subdomains(update_all=False): targets_list = load_all_targets() target_with_subdomains = load_target_with_subdomains() # targets_left_path = os.path.join(SUBS_DIR,'targets_left.txt') # with open(targets_left_path,'w') as f: # for target in targets_list: # if target['domain'] not in target_with_subdomains: # f.write(json.dumps(target, ensure_ascii=False)+'\n') update_count = 0 all_count = len(targets_list) with open(target_with_subdomains_path,'a') as f: for target in targets_list: try: update_count += 1 #get domain like: get aiyo.xyz from www.aiyo.xyz target_domain = target['domain'] if not update_all and target_domain in target_with_subdomains: # logger.warning('Already brute subdomain for {}, skip'.format(target_domain)) continue if not target_domain.startswith('http'): target_domain = 'http://' + target_domain res = get_tld(target_domain, as_object=True, fail_silently=True) if res and res.fld: subdomains = get_subdomains(res.fld) if subdomains == None: continue target['subdomains'] = subdomains #append new result f.write(json.dumps(target, ensure_ascii=False)+'\n') logger.info('add subdomains result for {} success, progress {}/{}'.format(target_domain, update_count, all_count)) except Exception: logger.error('Unexpected error occured when update subdomains for {}'.format(target_domain))
Example #27
Source File: phishing_catcher.py From OSweep with MIT License | 5 votes |
def score_domain(provided_ioc): """Return the scores of the provided domain.""" score = 0 for suspicious_tld in suspicious["tlds"]: if provided_ioc.endswith(suspicious_tld): score += 20 try: res = tld.get_tld(provided_ioc, as_object=True, fail_silently=True, fix_protocol=True) domain = ".".join([res.subdomain, res.domain]) except Exception: domain = provided_ioc score += int(round(entropy.shannon_entropy(domain)*50)) domain = confusables.unconfuse(domain) words_in_domain = re.split("\W+", domain) if domain.startswith("*."): domain = domain[2:] if words_in_domain[0] in ["com", "net", "org"]: score += 10 for word in suspicious["keywords"]: if word in domain: score += suspicious["keywords"][word] for key in [k for k, v in suspicious["keywords"].items() if v >= 70]: for word in [w for w in words_in_domain if w not in ["email", "mail", "cloud"]]: if pylev.levenshtein(str(word), str(key)) == 1: score += 70 if "xn--" not in domain and domain.count("-") >= 4: score += domain.count("-") * 3 if domain.count(".") >= 3: score += domain.count(".") * 3 return score
Example #28
Source File: HomoglyphAttack.py From squatm3 with GNU General Public License v3.0 | 4 votes |
def switch_all_letters(self): """ The following function generates all the possible combinations using homoglyphs """ domains = [] url = get_tld(self.url, as_object=True, fix_protocol=True) domain = url.domain a = [] j = 0 glyphs = self.dictionary result1 = set() for ws in range(1, len(domain)): for i in range(0, (len(domain)-ws)+1): win = domain[i:i+ws] j = 0 while j < ws: c = win[j] if c in glyphs: win_copy = win for g in glyphs[c]: win = win.replace(c, g) result1.add(domain[:i] + win + domain[i+ws:]) win = win_copy j += 1 result2 = set() for domain in result1: for ws in range(1, len(domain)): for i in range(0, (len(domain)-ws)+1): win = domain[i:i+ws] j = 0 while j < ws: c = win[j] if c in glyphs: win_copy = win for g in glyphs[c]: win = win.replace(c, g) result2.add(domain[:i] + win + domain[i+ws:]) win = win_copy j += 1 return list(result1 | result2)
Example #29
Source File: catch_phishing.py From phishing_catcher with GNU General Public License v3.0 | 4 votes |
def score_domain(domain): """Score `domain`. The highest score, the most probable `domain` is a phishing site. Args: domain (str): the domain to check. Returns: int: the score of `domain`. """ score = 0 for t in suspicious['tlds']: if domain.endswith(t): score += 20 # Remove initial '*.' for wildcard certificates bug if domain.startswith('*.'): domain = domain[2:] # Removing TLD to catch inner TLD in subdomain (ie. paypal.com.domain.com) try: res = get_tld(domain, as_object=True, fail_silently=True, fix_protocol=True) domain = '.'.join([res.subdomain, res.domain]) except Exception: pass # Higer entropy is kind of suspicious score += int(round(entropy(domain)*10)) # Remove lookalike characters using list from http://www.unicode.org/reports/tr39 domain = unconfuse(domain) words_in_domain = re.split("\W+", domain) # ie. detect fake .com (ie. *.com-account-management.info) if words_in_domain[0] in ['com', 'net', 'org']: score += 10 # Testing keywords for word in suspicious['keywords']: if word in domain: score += suspicious['keywords'][word] # Testing Levenshtein distance for strong keywords (>= 70 points) (ie. paypol) for key in [k for (k,s) in suspicious['keywords'].items() if s >= 70]: # Removing too generic keywords (ie. mail.domain.com) for word in [w for w in words_in_domain if w not in ['email', 'mail', 'cloud']]: if distance(str(word), str(key)) == 1: score += 70 # Lots of '-' (ie. www.paypal-datacenter.com-acccount-alert.com) if 'xn--' not in domain and domain.count('-') >= 4: score += domain.count('-') * 3 # Deeply nested subdomains (ie. www.paypal.com.security.accountupdate.gq) if domain.count('.') >= 3: score += domain.count('.') * 3 return score
Example #30
Source File: subdom.py From Vaile with GNU General Public License v3.0 | 4 votes |
def subdombrute(web): try: print(GR+' [*] Importing wordlist path to be bruteforced... "files/subdomains.lst"') with open('files/subdomains.lst','r') as lol: for path in lol: a = path.replace("\n","") sublist.append(a) except IOError: print(R+' [-] Wordlist not found!') global found if 'http://' in web: web = web.replace('http://','') elif 'https://' in web: web = web.replace('https://','') else: pass web = 'http://' + web tld0 = get_tld(web, as_object=True) if len(sublist) > 0: for m in sublist: furl = str(m) + '.' + str(tld0) flist.append(furl) if flist: time.sleep(0.5) print(R+'\n B R U T E F O R C E R') print(R+' =======================\n') print(GR+' [*] Bruteforcing for possible subdomains...') for url in flist: if 'http://' in url: url = url.replace('http://','') elif 'https://' in url: url = url.replace('https://','') else: pass try: ip = socket.gethostbyname(url) print(G+'\n [!] Subdomain Found : '+O+url+P+' ['+str(ip)+']') found.append(url) except: sys.stdout.write(B+'\r [*] Checking : '+C+url) sys.stdout.flush() return found