Merge pull request #8 from grandpaul/paulliu-python3
Porting to Python3(contributed by Ying-Chun Liu (PaulLiu))
This commit is contained in:
56
gfw-pac.py
56
gfw-pac.py
@@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@@ -6,10 +6,10 @@ import math
|
|||||||
import socket
|
import socket
|
||||||
import struct
|
import struct
|
||||||
import pkgutil
|
import pkgutil
|
||||||
import urlparse
|
import urllib.parse
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import urllib2
|
import urllib.request, urllib.error, urllib.parse
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
gfwlist_url = 'https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt'
|
gfwlist_url = 'https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt'
|
||||||
@@ -45,14 +45,14 @@ def ip2long(ip):
|
|||||||
def fetch_ip_data():
|
def fetch_ip_data():
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
if (args.ip_file):
|
if (args.ip_file):
|
||||||
with open(args.ip_file, 'rb') as f:
|
with open(args.ip_file, 'r') as f:
|
||||||
data = f.read()
|
data = f.read()
|
||||||
else:
|
else:
|
||||||
#fetch data from apnic
|
#fetch data from apnic
|
||||||
print "Fetching data from apnic.net, it might take a few minutes, please wait..."
|
print("Fetching data from apnic.net, it might take a few minutes, please wait...")
|
||||||
url=r'http://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-latest'
|
url=r'http://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-latest'
|
||||||
# url=r'http://flora/delegated-apnic-latest' #debug
|
# url=r'http://flora/delegated-apnic-latest' #debug
|
||||||
data=urllib2.urlopen(url).read()
|
data=urllib.request.urlopen(url).read().decode('utf-8')
|
||||||
|
|
||||||
cnregex=re.compile(r'apnic\|cn\|ipv4\|[0-9\.]+\|[0-9]+\|[0-9]+\|a.*',re.IGNORECASE)
|
cnregex=re.compile(r'apnic\|cn\|ipv4\|[0-9\.]+\|[0-9]+\|[0-9]+\|a.*',re.IGNORECASE)
|
||||||
cndata=cnregex.findall(data)
|
cndata=cnregex.findall(data)
|
||||||
@@ -114,7 +114,7 @@ def get_hostname(something):
|
|||||||
# quite enough for GFW
|
# quite enough for GFW
|
||||||
if not something.startswith('http:'):
|
if not something.startswith('http:'):
|
||||||
something = 'http://' + something
|
something = 'http://' + something
|
||||||
r = urlparse.urlparse(something)
|
r = urllib.parse.urlparse(something)
|
||||||
return r.hostname
|
return r.hostname
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(e)
|
logging.error(e)
|
||||||
@@ -161,14 +161,14 @@ def parse_gfwlist(gfwlist):
|
|||||||
def reduce_domains(domains):
|
def reduce_domains(domains):
|
||||||
# reduce 'www.google.com' to 'google.com'
|
# reduce 'www.google.com' to 'google.com'
|
||||||
# remove invalid domains
|
# remove invalid domains
|
||||||
with open('./tld.txt', 'rb') as f:
|
with open('./tld.txt', 'r') as f:
|
||||||
tld_content = f.read()
|
tld_content = f.read()
|
||||||
tlds = set(tld_content.splitlines(False))
|
tlds = set(tld_content.splitlines(False))
|
||||||
new_domains = set()
|
new_domains = set()
|
||||||
for domain in domains:
|
for domain in domains:
|
||||||
domain_parts = domain.split('.')
|
domain_parts = domain.split('.')
|
||||||
last_root_domain = None
|
last_root_domain = None
|
||||||
for i in xrange(0, len(domain_parts)):
|
for i in range(0, len(domain_parts)):
|
||||||
root_domain = '.'.join(domain_parts[len(domain_parts) - i - 1:])
|
root_domain = '.'.join(domain_parts[len(domain_parts) - i - 1:])
|
||||||
if i == 0:
|
if i == 0:
|
||||||
if not tlds.__contains__(root_domain):
|
if not tlds.__contains__(root_domain):
|
||||||
@@ -185,7 +185,7 @@ def reduce_domains(domains):
|
|||||||
uni_domains = set()
|
uni_domains = set()
|
||||||
for domain in new_domains:
|
for domain in new_domains:
|
||||||
domain_parts = domain.split('.')
|
domain_parts = domain.split('.')
|
||||||
for i in xrange(0, len(domain_parts)-1):
|
for i in range(0, len(domain_parts)-1):
|
||||||
root_domain = '.'.join(domain_parts[len(domain_parts) - i - 1:])
|
root_domain = '.'.join(domain_parts[len(domain_parts) - i - 1:])
|
||||||
if domains.__contains__(root_domain):
|
if domains.__contains__(root_domain):
|
||||||
break
|
break
|
||||||
@@ -196,7 +196,7 @@ def reduce_domains(domains):
|
|||||||
|
|
||||||
def generate_pac_fast(domains, proxy, direct_domains, cnips, local_tlds):
|
def generate_pac_fast(domains, proxy, direct_domains, cnips, local_tlds):
|
||||||
# render the pac file
|
# render the pac file
|
||||||
with open('./pac-template', 'rb') as f:
|
with open('./pac-template', 'r') as f:
|
||||||
proxy_content = f.read()
|
proxy_content = f.read()
|
||||||
domains_dict = {}
|
domains_dict = {}
|
||||||
for domain in domains:
|
for domain in domains:
|
||||||
@@ -242,7 +242,7 @@ def generate_pac_precise(rules, proxy):
|
|||||||
return None
|
return None
|
||||||
# render the pac file
|
# render the pac file
|
||||||
proxy_content = pkgutil.get_data('gfwlist2pac', './abp.js')
|
proxy_content = pkgutil.get_data('gfwlist2pac', './abp.js')
|
||||||
rules = filter(grep_rule, rules)
|
rules = list(filter(grep_rule, rules))
|
||||||
proxy_content = proxy_content.replace('__PROXY__', json.dumps(str(proxy)))
|
proxy_content = proxy_content.replace('__PROXY__', json.dumps(str(proxy)))
|
||||||
proxy_content = proxy_content.replace('__RULES__',
|
proxy_content = proxy_content.replace('__RULES__',
|
||||||
json.dumps(rules, indent=2))
|
json.dumps(rules, indent=2))
|
||||||
@@ -255,46 +255,46 @@ def main():
|
|||||||
direct_rule = None
|
direct_rule = None
|
||||||
localtld_rule = None
|
localtld_rule = None
|
||||||
if (args.input):
|
if (args.input):
|
||||||
with open(args.input, 'rb') as f:
|
with open(args.input, 'r') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
else:
|
else:
|
||||||
print 'Downloading gfwlist from %s' % gfwlist_url
|
print('Downloading gfwlist from %s' % gfwlist_url)
|
||||||
content = urllib2.urlopen(gfwlist_url, timeout=10).read()
|
content = urllib.request.urlopen(gfwlist_url, timeout=10).read().decode('utf-8')
|
||||||
if args.user_rule:
|
if args.user_rule:
|
||||||
userrule_parts = urlparse.urlsplit(args.user_rule)
|
userrule_parts = urllib.parse.urlsplit(args.user_rule)
|
||||||
if not userrule_parts.scheme or not userrule_parts.netloc:
|
if not userrule_parts.scheme or not userrule_parts.netloc:
|
||||||
# It's not an URL, deal it as local file
|
# It's not an URL, deal it as local file
|
||||||
with open(args.user_rule, 'rb') as f:
|
with open(args.user_rule, 'r') as f:
|
||||||
user_rule = f.read()
|
user_rule = f.read()
|
||||||
else:
|
else:
|
||||||
# Yeah, it's an URL, try to download it
|
# Yeah, it's an URL, try to download it
|
||||||
print 'Downloading user rules file from %s' % args.user_rule
|
print('Downloading user rules file from %s' % args.user_rule)
|
||||||
user_rule = urllib2.urlopen(args.user_rule, timeout=10).read()
|
user_rule = urllib.request.urlopen(args.user_rule, timeout=10).read().decode('utf-8')
|
||||||
|
|
||||||
if args.direct_rule:
|
if args.direct_rule:
|
||||||
directrule_parts = urlparse.urlsplit(args.direct_rule)
|
directrule_parts = urllib.parse.urlsplit(args.direct_rule)
|
||||||
if not directrule_parts.scheme or not directrule_parts.netloc:
|
if not directrule_parts.scheme or not directrule_parts.netloc:
|
||||||
# It's not an URL, deal it as local file
|
# It's not an URL, deal it as local file
|
||||||
with open(args.direct_rule, 'rb') as f:
|
with open(args.direct_rule, 'r') as f:
|
||||||
direct_rule = f.read()
|
direct_rule = f.read()
|
||||||
else:
|
else:
|
||||||
# Yeah, it's an URL, try to download it
|
# Yeah, it's an URL, try to download it
|
||||||
print 'Downloading user rules file from %s' % args.user_rule
|
print('Downloading user rules file from %s' % args.user_rule)
|
||||||
direct_rule = urllib2.urlopen(args.direct_rule, timeout=10).read()
|
direct_rule = urllib.request.urlopen(args.direct_rule, timeout=10).read().decode('utf-8')
|
||||||
direct_rule = direct_rule.splitlines(False)
|
direct_rule = direct_rule.splitlines(False)
|
||||||
else:
|
else:
|
||||||
direct_rule = []
|
direct_rule = []
|
||||||
|
|
||||||
if args.localtld_rule:
|
if args.localtld_rule:
|
||||||
tldrule_parts = urlparse.urlsplit(args.localtld_rule)
|
tldrule_parts = urllib.parse.urlsplit(args.localtld_rule)
|
||||||
if not tldrule_parts.scheme or not tldrule_parts.netloc:
|
if not tldrule_parts.scheme or not tldrule_parts.netloc:
|
||||||
# It's not an URL, deal it as local file
|
# It's not an URL, deal it as local file
|
||||||
with open(args.localtld_rule, 'rb') as f:
|
with open(args.localtld_rule, 'r') as f:
|
||||||
localtld_rule = f.read()
|
localtld_rule = f.read()
|
||||||
else:
|
else:
|
||||||
# Yeah, it's an URL, try to download it
|
# Yeah, it's an URL, try to download it
|
||||||
print 'Downloading local tlds rules file from %s' % args.user_rule
|
print('Downloading local tlds rules file from %s' % args.user_rule)
|
||||||
localtld_rule = urllib2.urlopen(args.localtld_rule, timeout=10).read()
|
localtld_rule = urllib.request.urlopen(args.localtld_rule, timeout=10).read().decode('utf-8')
|
||||||
localtld_rule = localtld_rule.splitlines(False)
|
localtld_rule = localtld_rule.splitlines(False)
|
||||||
else:
|
else:
|
||||||
localtld_rule = []
|
localtld_rule = []
|
||||||
@@ -308,7 +308,7 @@ def main():
|
|||||||
domains = reduce_domains(domains)
|
domains = reduce_domains(domains)
|
||||||
pac_content = generate_pac_fast(domains, args.proxy, direct_rule, cnips, localtld_rule)
|
pac_content = generate_pac_fast(domains, args.proxy, direct_rule, cnips, localtld_rule)
|
||||||
|
|
||||||
with open(args.output, 'wb') as f:
|
with open(args.output, 'w') as f:
|
||||||
f.write(pac_content)
|
f.write(pac_content)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user