Merge pull request #8 from grandpaul/paulliu-python3

Porting to Python3(contributed by Ying-Chun Liu (PaulLiu))
This commit is contained in:
Zhiyi
2020-03-06 11:27:41 +08:00
committed by GitHub

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python #!/usr/bin/python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import re import re
@@ -6,10 +6,10 @@ import math
import socket import socket
import struct import struct
import pkgutil import pkgutil
import urlparse import urllib.parse
import json import json
import logging import logging
import urllib2 import urllib.request, urllib.error, urllib.parse
from argparse import ArgumentParser from argparse import ArgumentParser
gfwlist_url = 'https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt' gfwlist_url = 'https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt'
@@ -45,14 +45,14 @@ def ip2long(ip):
def fetch_ip_data(): def fetch_ip_data():
args = parse_args() args = parse_args()
if (args.ip_file): if (args.ip_file):
with open(args.ip_file, 'rb') as f: with open(args.ip_file, 'r') as f:
data = f.read() data = f.read()
else: else:
#fetch data from apnic #fetch data from apnic
print "Fetching data from apnic.net, it might take a few minutes, please wait..." print("Fetching data from apnic.net, it might take a few minutes, please wait...")
url=r'http://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-latest' url=r'http://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-latest'
# url=r'http://flora/delegated-apnic-latest' #debug # url=r'http://flora/delegated-apnic-latest' #debug
data=urllib2.urlopen(url).read() data=urllib.request.urlopen(url).read().decode('utf-8')
cnregex=re.compile(r'apnic\|cn\|ipv4\|[0-9\.]+\|[0-9]+\|[0-9]+\|a.*',re.IGNORECASE) cnregex=re.compile(r'apnic\|cn\|ipv4\|[0-9\.]+\|[0-9]+\|[0-9]+\|a.*',re.IGNORECASE)
cndata=cnregex.findall(data) cndata=cnregex.findall(data)
@@ -114,7 +114,7 @@ def get_hostname(something):
# quite enough for GFW # quite enough for GFW
if not something.startswith('http:'): if not something.startswith('http:'):
something = 'http://' + something something = 'http://' + something
r = urlparse.urlparse(something) r = urllib.parse.urlparse(something)
return r.hostname return r.hostname
except Exception as e: except Exception as e:
logging.error(e) logging.error(e)
@@ -161,14 +161,14 @@ def parse_gfwlist(gfwlist):
def reduce_domains(domains): def reduce_domains(domains):
# reduce 'www.google.com' to 'google.com' # reduce 'www.google.com' to 'google.com'
# remove invalid domains # remove invalid domains
with open('./tld.txt', 'rb') as f: with open('./tld.txt', 'r') as f:
tld_content = f.read() tld_content = f.read()
tlds = set(tld_content.splitlines(False)) tlds = set(tld_content.splitlines(False))
new_domains = set() new_domains = set()
for domain in domains: for domain in domains:
domain_parts = domain.split('.') domain_parts = domain.split('.')
last_root_domain = None last_root_domain = None
for i in xrange(0, len(domain_parts)): for i in range(0, len(domain_parts)):
root_domain = '.'.join(domain_parts[len(domain_parts) - i - 1:]) root_domain = '.'.join(domain_parts[len(domain_parts) - i - 1:])
if i == 0: if i == 0:
if not tlds.__contains__(root_domain): if not tlds.__contains__(root_domain):
@@ -185,7 +185,7 @@ def reduce_domains(domains):
uni_domains = set() uni_domains = set()
for domain in new_domains: for domain in new_domains:
domain_parts = domain.split('.') domain_parts = domain.split('.')
for i in xrange(0, len(domain_parts)-1): for i in range(0, len(domain_parts)-1):
root_domain = '.'.join(domain_parts[len(domain_parts) - i - 1:]) root_domain = '.'.join(domain_parts[len(domain_parts) - i - 1:])
if domains.__contains__(root_domain): if domains.__contains__(root_domain):
break break
@@ -196,7 +196,7 @@ def reduce_domains(domains):
def generate_pac_fast(domains, proxy, direct_domains, cnips, local_tlds): def generate_pac_fast(domains, proxy, direct_domains, cnips, local_tlds):
# render the pac file # render the pac file
with open('./pac-template', 'rb') as f: with open('./pac-template', 'r') as f:
proxy_content = f.read() proxy_content = f.read()
domains_dict = {} domains_dict = {}
for domain in domains: for domain in domains:
@@ -242,7 +242,7 @@ def generate_pac_precise(rules, proxy):
return None return None
# render the pac file # render the pac file
proxy_content = pkgutil.get_data('gfwlist2pac', './abp.js') proxy_content = pkgutil.get_data('gfwlist2pac', './abp.js')
rules = filter(grep_rule, rules) rules = list(filter(grep_rule, rules))
proxy_content = proxy_content.replace('__PROXY__', json.dumps(str(proxy))) proxy_content = proxy_content.replace('__PROXY__', json.dumps(str(proxy)))
proxy_content = proxy_content.replace('__RULES__', proxy_content = proxy_content.replace('__RULES__',
json.dumps(rules, indent=2)) json.dumps(rules, indent=2))
@@ -255,46 +255,46 @@ def main():
direct_rule = None direct_rule = None
localtld_rule = None localtld_rule = None
if (args.input): if (args.input):
with open(args.input, 'rb') as f: with open(args.input, 'r') as f:
content = f.read() content = f.read()
else: else:
print 'Downloading gfwlist from %s' % gfwlist_url print('Downloading gfwlist from %s' % gfwlist_url)
content = urllib2.urlopen(gfwlist_url, timeout=10).read() content = urllib.request.urlopen(gfwlist_url, timeout=10).read().decode('utf-8')
if args.user_rule: if args.user_rule:
userrule_parts = urlparse.urlsplit(args.user_rule) userrule_parts = urllib.parse.urlsplit(args.user_rule)
if not userrule_parts.scheme or not userrule_parts.netloc: if not userrule_parts.scheme or not userrule_parts.netloc:
# It's not an URL, deal it as local file # It's not an URL, deal it as local file
with open(args.user_rule, 'rb') as f: with open(args.user_rule, 'r') as f:
user_rule = f.read() user_rule = f.read()
else: else:
# Yeah, it's an URL, try to download it # Yeah, it's an URL, try to download it
print 'Downloading user rules file from %s' % args.user_rule print('Downloading user rules file from %s' % args.user_rule)
user_rule = urllib2.urlopen(args.user_rule, timeout=10).read() user_rule = urllib.request.urlopen(args.user_rule, timeout=10).read().decode('utf-8')
if args.direct_rule: if args.direct_rule:
directrule_parts = urlparse.urlsplit(args.direct_rule) directrule_parts = urllib.parse.urlsplit(args.direct_rule)
if not directrule_parts.scheme or not directrule_parts.netloc: if not directrule_parts.scheme or not directrule_parts.netloc:
# It's not an URL, deal it as local file # It's not an URL, deal it as local file
with open(args.direct_rule, 'rb') as f: with open(args.direct_rule, 'r') as f:
direct_rule = f.read() direct_rule = f.read()
else: else:
# Yeah, it's an URL, try to download it # Yeah, it's an URL, try to download it
print 'Downloading user rules file from %s' % args.user_rule print('Downloading user rules file from %s' % args.user_rule)
direct_rule = urllib2.urlopen(args.direct_rule, timeout=10).read() direct_rule = urllib.request.urlopen(args.direct_rule, timeout=10).read().decode('utf-8')
direct_rule = direct_rule.splitlines(False) direct_rule = direct_rule.splitlines(False)
else: else:
direct_rule = [] direct_rule = []
if args.localtld_rule: if args.localtld_rule:
tldrule_parts = urlparse.urlsplit(args.localtld_rule) tldrule_parts = urllib.parse.urlsplit(args.localtld_rule)
if not tldrule_parts.scheme or not tldrule_parts.netloc: if not tldrule_parts.scheme or not tldrule_parts.netloc:
# It's not an URL, deal it as local file # It's not an URL, deal it as local file
with open(args.localtld_rule, 'rb') as f: with open(args.localtld_rule, 'r') as f:
localtld_rule = f.read() localtld_rule = f.read()
else: else:
# Yeah, it's an URL, try to download it # Yeah, it's an URL, try to download it
print 'Downloading local tlds rules file from %s' % args.user_rule print('Downloading local tlds rules file from %s' % args.user_rule)
localtld_rule = urllib2.urlopen(args.localtld_rule, timeout=10).read() localtld_rule = urllib.request.urlopen(args.localtld_rule, timeout=10).read().decode('utf-8')
localtld_rule = localtld_rule.splitlines(False) localtld_rule = localtld_rule.splitlines(False)
else: else:
localtld_rule = [] localtld_rule = []
@@ -308,7 +308,7 @@ def main():
domains = reduce_domains(domains) domains = reduce_domains(domains)
pac_content = generate_pac_fast(domains, args.proxy, direct_rule, cnips, localtld_rule) pac_content = generate_pac_fast(domains, args.proxy, direct_rule, cnips, localtld_rule)
with open(args.output, 'wb') as f: with open(args.output, 'w') as f:
f.write(pac_content) f.write(pac_content)