from libs.ext_mngr import requests, re, time # , urljoin IPNetwork, IPAddress, from libs.var_mngr import list_2d_1d from tldextract import extract # def edit_link(url, schema, ports): if len(url) < 2: return [] lst = [] if not ports: ports = [""] for p in ports: if not "://" in url: lst.extend([sch + "://" + url + ":" + p if p else sch + "://" + url for sch in schema]) if not lst: lst = [url] return lst # def site_finder(url): url = extract(url) if is_ip(url[1]): return url[1] if url[0] and not url[0] in ["*", " "]: return url[0] + "." + url[1] + "." + url[2] elif url[1] and url[2]: return url[1] + "." + url[2] return 0 # def evil_dns_res(domain): fake = 0 evil_list = ["dr4k0v3r", "dr4k0v3r_p3n73571ng", "dr4k0v3r_c0mp4ny", "dr4k0v3r.Compney", "dr4k0v3r.fake.dns", "dr4k0v3r.kickass.", "dr4k0v3r.was_here", "dr4k0v3r.was_here.dns"] for sub in evil_list: subdom = subdom_resolver(sub + "." + domain, 4, 0.1) if subdom: fake += 1 if fake > 0.5 * len(evil_list): return 1 return 0 # def evil_page_res(domain, request, schemas_ports): request = eval("requests." + request) fake = 0 evil_list = ["dr4k0v3r", "dr4k0v3r_p3n73571ng", "dr4k0v3r_c0mp4ny", "dr4k0v3r.Compney", "dr4k0v3r.fake.dns", "dr4k0v3r.was_here"] evil_list = list_2d_1d(
[list_2d_1d([edit_link(sub + "." + domain, schemas_ports[i], schemas_ports[i + 1]) for i in range(0, len(schemas_ports), 2)]) for sub in evil_list]) for sub in evil_list: for i in range(4): try: if requests.head(sub, timeout=1).status_code == 200 or requests.head(sub, timeout=1).status_code == 200: fake += 1 break except KeyboardInterrupt: return None except: pass time.sleep(0.1) if fake > 0.5 * len(evil_list) / sum(len(schemas_ports[sp]) for sp in range(1, len(schemas_ports), 2)): return 1 return 0 # def correct_url(url, link="/"): if not link in [None, "#", "", "/"]: rlink = urljoin(url, link) if rlink and is_valid_url(rlink): return rlink elif rlink: print("failed from", url, "to", link, "\ncompination", rlink) return 0 # def url_in_lst(url, lst): if url: if url[-1] == "/": url2 = url[:-1] else: url2 = url + "/" if url in lst or url2 in lst: return 1 else: return 0 else: return 0 # def url_redirect_follower(url, request_type="get", request_timeout=1, max_redirect_follow=10): try: rlink = "" rlist = [] request_func = requests.head if request_type == "head" else requests.get rsp = request_func(url, timeout=request_timeout) while rsp.status_code in range(300, 400) and max_redirect_follow > 0: if rsp.headers.get('Location'): rsp = request_func(rsp.url, timeout=request_timeout) if rsp.url in rlist: break else: rlist.append(rsp.url) max_redirect_follow -= 1 print(max_redirect_follow, rsp.url, rsp.status_code) else: break else: if rsp.status_code == 200: rlink = rsp.url except Exception: pass finally: return rlink # def is_text_page(p_type, rlink): try: res = requests.head(rlink, timeout=60) if rlink and 'Content-Type' in res.headers.keys() and p_type in res.headers['Content-Type']: return 1 else: return 0 except: return 0 # def is_ip(ip): try: IPAddress(ip) return 1 except: return 0 # def ip_range_to_list(frst, lst): return [str(IPAddress(c)) for c in range(frst, lst)] # from 1st ip1 to last ip2 def ip_range_to_int(ip1, ip2=""): if not ip2: ip2 = ip1 if ip1 and is_ip(ip1.split("/")[0]) and ip2 and is_ip(ip2.split("/")[0]): frst, lst = min(IPNetwork(ip1).first, IPNetwork(ip2).first), max(IPNetwork(ip1).last, IPNetwork(ip2).last) return [frst, lst] else: print("failed on", ip1, ip2) # ip1 ranges # def is_mail(url): if url and ("mailto:" in url or "@" in url): return 1 return 0 # def is_javascript(url): if url and ("javascript:" in url): return 1 return 0 # def is_tel(url): if url and ("tel:" in url): return 1 return 0 # def is_valid_url(url): regex = re.compile( r'^https?://' # http:// or https:// r'(??:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain... r'^file://|' r'^ftp://|' r'localhost|' # localhost... r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip r'(?::\d+)?' # optional port r'(?:/?|[/?]\S+)$', re.IGNORECASE) if url is not None and regex.search(url): return 1 def schema_based_url(url): if not is_valid_url(url): url = "http://"+ url return url # def is_online(url): site = site_finder(url) if is_ip(site): return 1 elif site: site = subdom_resolver(site, 5, 0.1) return site # def dom_finder(url): if url: try: url = extract(url)[:] if is_ip(url[1]): return [url[1]] if url[1] and url[2]: # and not " " in url[0] return url[1:] except: pass return 0 def is_domain(url): d = extract(url) if d[1] and d[2]: return 1 return 0 # detect if any fail scentance arabic / english is found # need update and change languge on proxy def pattern_in_page(src, sentence): if any(i in src for i in sentence): return 1 else: return 0