initial commit.

This commit is contained in:
zhiyi
2015-04-13 17:27:44 +08:00
commit ccb5ed49b5
10 changed files with 59930 additions and 0 deletions

45
.gitignore vendored Normal file
View File

@@ -0,0 +1,45 @@
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# Windows image file caches
Thumbs.db
ehthumbs.db
# Folder config file
Desktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msm
*.msp
# Windows shortcuts
*.lnk

41
README.md Normal file
View File

@@ -0,0 +1,41 @@
# gfw-pac
通过 gfwlist 和中国 IP 地址生成 PAC(Proxy auto-config) 文件。对存在于 gfwlist 的域名和解析出的 IP 在国外的域名使用代理。
基于 [GFWList2PAC](https://github.com/clowwindy/gfwlist2pac) 和 [Flora PAC](https://github.com/Leask/Flora_Pac)
## 特性
* 速度快,优先按域名匹配,再按 IP 匹配
* 可自定义需要代理的域名
* 可自定义不需要代理的域名
* 如果访问的域名不在列表里,但是 IP 在国外,也返回代理服务器
## 用法
直接使用 `gfw.pac`,或者手工运行 `gfw-pac.py` 生成自己的 pac 文件。
## gfw-pac.py 使用说明
usage: gfw-pac.py [-h] [-i GFWLIST] -f PAC -p PROXY [--user-rule USER_RULE]
[--direct-rule DIRECT_RULE] [--ip-file IP_FILE]
参数说明:
-h 显示帮助
-i 指定本地 gfwlist 文件,若不指定则自动从 googlecode 下载
-f (必须)输出的 pac 文件
-p (必须)指定代理服务器
--user-rule 自定义使用代理的域名文件,文件里每行一个域名
--direct-rule 自定义不使用代理的域名文件,文件里每行一个域名
--ip-file 指定本地的从 apnic 下载的 IP 分配文件。若不指定则自动从 apnic 下载
举例:
./gfw-pac.py -i gfwlist.txt \
-f gfw.pac \
-p "PROXY 192.168.1.200:3128; DIRECT" \
--user-rule=custom-domains.txt \
--direct-rule=direct-domains.txt \
--ip-file=delegated-apnic-latest.txt
[一路凯歌 技术博客](http://zhiyi.us)

55
custom-domains.txt Normal file
View File

@@ -0,0 +1,55 @@
google.com
google.com.hk
ggpht.com
googleapis.com
google-analytics.com
googleadservices.com
googlesyndication.com
googleadsserving.cn
doubleclick.net
wikipedia.org
stackoverflow.com
sstatic.net
imgur.com
github.com
githubusercontent.com
github.io
googlecode.com
docker.io
docker.com
angularjs.org
rsyslog.com
gerhards.net
chrome.com
digicert.com
typekit.net
wordpress.com
aolcdn.com
wp.com
ifixit.com
atlassian.com
amazonaws.com
cloudfront.net
trello.com
appelsiini.net
angularjs.org
mplxtms.com
slack.com
fastly.net
edgekey.net
discussions.apple.com
disquscdn.com
wenzhixin.net.cn
last.fm
scdn.co
spotify.com
spotilocal.com
music.126.net
laravel.com
youbma.com
youb99.com
youb88.com
youb77.com
youb66.com
youb11.com
qiporn.com

38084
delegated-apnic-latest.txt Normal file
View File

File diff suppressed because it is too large Load Diff

368
direct-domains.txt Normal file
View File

@@ -0,0 +1,368 @@
10010.com
115.com
123u.com
126.com
126.net
163.com
17173.com
178.com
17cdn.com
21cn.com
2288.org
3322.org
360buy.com
360buyimg.com
360doc.com
360safe.com
36kr.com
400gb.com
4399.com
51.la
51buy.com
51cto.com
51job.com
51jobcdn.com
5d6d.com
5d6d.net
61.com
6600.org
6rooms.com
7766.org
7k7k.com
8800.org
8866.org
90g.org
91.com
9966.org
acfun.tv
aicdn.com
ali213.net
alibaba.com
alicdn.com
aliexpress.com
aliimg.com
alikunlun.com
alimama.com
alipay.com
alipayobjects.com
alisoft.com
aliyun.com
aliyuncdn.com
aliyuncs.com
anzhi.com
appinn.com
apple.com
appsina.com
archlinuxcn.org
atpanel.com
baidu.com
baidupcs.com
baidustatic.com
baifendian.com
baihe.com
baixing.com
bdimg.com
bdstatic.com
bilibili.tv
blogbus.com
blueidea.com
ccb.com
cctv.com
cctvpic.com
cdn20.com
china.com
chinabyte.com
chinacache.com
chinacache.net
chinacaipu.com
chinagba.com
chinahr.com
chinajoy.net
chinamobile.com
chinanetcenter.com
chinanews.com
chinapnr.com
chinaren.com
chinaspeeds.net
chinaunix.net
chinaz.com
chint.com
chiphell.com
chuangxin.com
ci123.com
ciku5.com
citysbs.com
class.coursera.org
cloudcdn.net
cmbchina.com
cmfu.com
cmread.com
cmwb.com
cn.archive.ubuntu.com
cn.bing.com
cn.coremetrics.com
cn.debian.org
cn.msn.com
cn
cnak2.englishtown.com
cnbeta.com
cnbetacdn.com
cnblogs.com
cnepub.com
cnzz.com
comsenz.com
csdn.net
ct10000.com
ctdisk.com
dangdang.com
dbank.com
dedecms.com
diandian.com
dianping.com
discuz.com
discuz.net
dl.google.com
docin.com
donews.com
dospy.com
douban.com
douban.fm
duapp.com
duba.net
duomi.com
duote.com
duowan.com
egou.com
et8.org
etao.com
f3322.org
fantong.com
fenzhi.com
fhldns.com
ganji.com
gaopeng.com
geekpark.net
gfan.com
gtimg.com
hacdn.net
hadns.net
hao123.com
hao123img.com
hc360.com
hdslb.com
hexun.com
hiapk.com
hichina.com
hoopchina.com
huanqiu.com
hudong.com
huochepiao.com
hupu.com
iask.com
iciba.com
idqqimg.com
ifanr.com
ifeng.com
ifengimg.com
ijinshan.com
iqiyi.com
it168.com
itcpn.net
iteye.com
itouzi.com
jandan.net
jd.com
jiashule.com
jiasule.com
jiathis.com
jiayuan.com
jiepang.com
jing.fm
jobbole.com
jstv.com
jumei.com
kaixin001.com
kandian.com
kandian.net
kanimg.com
kankanews.com
kdnet.net
koudai8.com
ku6.com
ku6cdn.com
ku6img.com
kuaidi100.com
kugou.com
lashou.com
letao.com
letv.com
lietou.com
linezing.com
loli.mg
loli.vg
lvping.com
lxdns.com
mangocity.com
mapbar.com
mcbbs.net
mediav.com
meilishuo.com
meituan.com
meituan.net
meizu.com
microsoft.com
miui.com
moe123.com
moegirl.org
mop.com
mtime.com
my-card.in
mydrivers.com
mzstatic.com
netease.com
newsmth.net
ngacn.cc
nuomi.com
okbuy.com
optaim.com
oschina.net
paipai.com
pcbeta.com
pchome.net
pcpop.com
pengyou.com
phoenixlzx.com
phpwind.net
pingan.com
pool.ntp.org
pplive.com
pps.tv
ppstream.com
pptv.com
pubyun.com
qhimg.com
qianlong.com
qidian.com
qingdaonews.com
qiniu.com
qiniudn.com
qiushibaike.com
qiyi.com
qiyipic.com
qq.com
qqmail.com
qstatic.com
qunar.com
qunarzz.com
qvbuy.com
renren.com
renrendai.com
rrfmn.com
rrimg.com
sanguosha.com
sdo.com
sina.com
sinaapp.com
sinaedge.com
sinaimg.com
sinajs.com
skycn.com
smzdm.com
sogou.com
sohu.com
soku.com
solidot.org
soso.com
soufun.com
soufunimg.com
staticfile.org
staticsdo.com
steamcn.com
suning.com
szzfgjj.com
tanx.com
taobao.com
taobaocdn.com
tbcache.com
tdimg.com
tencent.com
tenpay.com
tgbus.com
thawte.com
tiancity.com
tianyaui.com
tiexue.net
tmall.com
tmcdn.net
tom.com
tomonline-inc.com
tuan800.com
tuan800.net
tuanimg.com
tudou.com
tudouui.com
tuniu.com
u148.net
u17.com
ubuntu.com
ucjoy.com
uni-marketers.com
unionpay.com
unionpaysecure.com
upaiyun.com
upyun.com
uusee.com
uuu9.com
vaikan.com
vancl.com
vcimg.com
verycd.com
wandoujia.com
wdjimg.com
weibo.com
weiphone.com
weiyun.com
west263.com
wrating.com
wscdns.com
wumii.com
xdcdn.net
xiachufang.com
xiami.com
xiami.net
xiaomi.com
xiaonei.com
xiazaiba.com
xici.net
xilu.com
xinhuanet.com
xinnet.com
xlpan.com
xn--fiqs8s
xnpic.com
xungou.com
xunlei.com
ydstatic.com
yesky.com
yeyou.com
yihaodian.com
yihaodianimg.com
yingjiesheng.com
yintai.com
yinyuetai.com
yiqifa.com
yixun.com
ykimg.com
ynet.com
youdao.com
yougou.com
youku.com
yupoo.com
yy.com
zbjimg.com
zhaopin.com
zhi.hu
zhihu.com
zhimg.com
zhubajie.com
zongheng.com

289
gfw-pac.py Executable file
View File

@@ -0,0 +1,289 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
import math
import socket
import struct
import pkgutil
import urlparse
import json
import logging
import urllib2
from argparse import ArgumentParser
gfwlist_url = 'https://autoproxy-gfwlist.googlecode.com/svn/trunk/gfwlist.txt'
def parse_args():
parser = ArgumentParser()
parser.add_argument('-i', '--input', dest='input',
help='path to gfwlist', metavar='GFWLIST')
parser.add_argument('-f', '--file', dest='output', required=True,
help='path to output pac', metavar='PAC')
parser.add_argument('-p', '--proxy', dest='proxy', required=True,
help='the proxy parameter in the pac file, '
'for example, "SOCKS5 127.0.0.1:1080;"',
metavar='PROXY')
parser.add_argument('--user-rule', dest='user_rule',
help='user rule file, which will be appended to'
' gfwlist')
parser.add_argument('--direct-rule', dest='direct_rule',
help='user rule file, contains domains not bypass proxy')
parser.add_argument('--ip-file', dest='ip_file',
help='delegated-apnic-latest from apnic.net')
return parser.parse_args()
#from https://github.com/Leask/Flora_Pac
def ip2long(ip):
packedIP = socket.inet_aton(ip)
return struct.unpack("!L", packedIP)[0]
#from https://github.com/Leask/Flora_Pac
def fetch_ip_data():
args = parse_args()
if (args.ip_file):
with open(args.ip_file, 'rb') as f:
data = f.read()
else:
#fetch data from apnic
print "Fetching data from apnic.net, it might take a few minutes, please wait..."
url=r'http://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-latest'
# url=r'http://flora/delegated-apnic-latest' #debug
data=urllib2.urlopen(url).read()
cnregex=re.compile(r'apnic\|cn\|ipv4\|[0-9\.]+\|[0-9]+\|[0-9]+\|a.*',re.IGNORECASE)
cndata=cnregex.findall(data)
results=[]
prev_net=''
for item in cndata:
unit_items=item.split('|')
starting_ip=unit_items[3]
num_ip=int(unit_items[4])
imask=0xffffffff^(num_ip-1)
#convert to string
imask=hex(imask)[2:]
mask=[0]*4
mask[0]=imask[0:2]
mask[1]=imask[2:4]
mask[2]='0' #imask[4:6]
mask[3]='0' #imask[6:8]
#convert str to int
mask=[ int(i,16 ) for i in mask]
mask="%d.%d.%d.%d"%tuple(mask)
#mask in *nix format
mask2=32-int(math.log(num_ip,2))
ip=starting_ip.split('.')
ip[2] = '0'
ip[3] = '0'
starting_ip = '.'.join(ip)
if starting_ip != prev_net:
results.append((ip2long(starting_ip), ip2long(mask), mask2))
prev_net = starting_ip
results.insert(0, (ip2long('127.0.0.1'), ip2long('255.0.0.0'), 0))
results.insert(1, (ip2long('10.0.0.0'), ip2long('255.0.0.0'), 0))
results.insert(2, (ip2long('172.16.0.0'), ip2long('255.240.0.0'), 0))
results.insert(3, (ip2long('192.168.0.0'), ip2long('255.255.0.0'), 0))
def ip(item):
return item[0]
results = sorted(results, key = ip)
return results
def decode_gfwlist(content):
# decode base64 if have to
try:
if '.' in content:
raise Exception()
return content.decode('base64')
except:
return content
def get_hostname(something):
try:
# quite enough for GFW
if not something.startswith('http:'):
something = 'http://' + something
r = urlparse.urlparse(something)
return r.hostname
except Exception as e:
logging.error(e)
return None
def add_domain_to_set(s, something):
hostname = get_hostname(something)
if hostname is not None:
s.add(hostname)
def combine_lists(content, user_rule=None):
gfwlist = content.splitlines(False)
if user_rule:
gfwlist.extend(user_rule.splitlines(False))
return gfwlist
def parse_gfwlist(gfwlist):
domains = set()
for line in gfwlist:
if line.find('.*') >= 0:
continue
elif line.find('*') >= 0:
line = line.replace('*', '/')
if line.startswith('||'):
line = line.lstrip('||')
elif line.startswith('|'):
line = line.lstrip('|')
elif line.startswith('.'):
line = line.lstrip('.')
if line.startswith('!'):
continue
elif line.startswith('['):
continue
elif line.startswith('@'):
# ignore white list
continue
add_domain_to_set(domains, line)
return domains
def reduce_domains(domains):
# reduce 'www.google.com' to 'google.com'
# remove invalid domains
with open('./tld.txt', 'rb') as f:
tld_content = f.read()
tlds = set(tld_content.splitlines(False))
new_domains = set()
for domain in domains:
domain_parts = domain.split('.')
last_root_domain = None
for i in xrange(0, len(domain_parts)):
root_domain = '.'.join(domain_parts[len(domain_parts) - i - 1:])
if i == 0:
if not tlds.__contains__(root_domain):
# root_domain is not a valid tld
break
last_root_domain = root_domain
if tlds.__contains__(root_domain):
continue
else:
break
if last_root_domain is not None:
new_domains.add(last_root_domain)
uni_domains = set()
for domain in new_domains:
domain_parts = domain.split('.')
for i in xrange(0, len(domain_parts)-1):
root_domain = '.'.join(domain_parts[len(domain_parts) - i - 1:])
if domains.__contains__(root_domain):
break
else:
uni_domains.add(domain)
return uni_domains
def generate_pac_fast(domains, proxy, direct_domains, cnips):
# render the pac file
with open('./pac-template', 'rb') as f:
proxy_content = f.read()
domains_dict = {}
for domain in domains:
domains_dict[domain] = 1
proxy_content = proxy_content.replace('__PROXY__', json.dumps(str(proxy)))
proxy_content = proxy_content.replace(
'__DOMAINS__',
json.dumps(domains_dict, indent=2, sort_keys=True)
)
direct_domains_dict = {}
for domain in direct_domains:
direct_domains_dict[domain] = 1
proxy_content = proxy_content.replace(
'__DIRECT_DOMAINS__',
json.dumps(direct_domains_dict, indent=2, sort_keys=True)
)
proxy_content = proxy_content.replace(
'__CN_IPS__',
json.dumps(cnips, indent=2, sort_keys=False)
)
return proxy_content
def generate_pac_precise(rules, proxy):
def grep_rule(rule):
if rule:
if rule.startswith('!'):
return None
if rule.startswith('['):
return None
return rule
return None
# render the pac file
proxy_content = pkgutil.get_data('gfwlist2pac', './abp.js')
rules = filter(grep_rule, rules)
proxy_content = proxy_content.replace('__PROXY__', json.dumps(str(proxy)))
proxy_content = proxy_content.replace('__RULES__',
json.dumps(rules, indent=2))
return proxy_content
def main():
args = parse_args()
user_rule = None
direct_rule = None
if (args.input):
with open(args.input, 'rb') as f:
content = f.read()
else:
print 'Downloading gfwlist from %s' % gfwlist_url
content = urllib2.urlopen(gfwlist_url, timeout=10).read()
if args.user_rule:
userrule_parts = urlparse.urlsplit(args.user_rule)
if not userrule_parts.scheme or not userrule_parts.netloc:
# It's not an URL, deal it as local file
with open(args.user_rule, 'rb') as f:
user_rule = f.read()
else:
# Yeah, it's an URL, try to download it
print 'Downloading user rules file from %s' % args.user_rule
user_rule = urllib2.urlopen(args.user_rule, timeout=10).read()
if args.direct_rule:
directrule_parts = urlparse.urlsplit(args.direct_rule)
if not directrule_parts.scheme or not directrule_parts.netloc:
# It's not an URL, deal it as local file
with open(args.direct_rule, 'rb') as f:
direct_rule = f.read()
else:
# Yeah, it's an URL, try to download it
print 'Downloading user rules file from %s' % args.user_rule
direct_rule = urllib2.urlopen(args.direct_rule, timeout=10).read()
direct_rule = direct_rule.splitlines(False)
cnips = fetch_ip_data()
content = decode_gfwlist(content)
gfwlist = combine_lists(content, user_rule)
domains = parse_gfwlist(gfwlist)
domains = reduce_domains(domains)
pac_content = generate_pac_fast(domains, args.proxy, direct_rule, cnips)
with open(args.output, 'wb') as f:
f.write(pac_content)
if __name__ == '__main__':
main()

12190
gfw.pac Normal file
View File

File diff suppressed because it is too large Load Diff

2244
gfwlist.txt Normal file
View File

File diff suppressed because it is too large Load Diff

91
pac-template Normal file
View File

@@ -0,0 +1,91 @@
var cnips = __CN_IPS__;
var directDomains = __DIRECT_DOMAINS__;
var domains = __DOMAINS__;
var proxy = __PROXY__;
var direct = 'DIRECT;';
var hasOwnProperty = Object.hasOwnProperty;
function convertAddress(ipchars) {
var bytes = ipchars.split('.');
var result = ((bytes[0] & 0xff) << 24) |
((bytes[1] & 0xff) << 16) |
((bytes[2] & 0xff) << 8) |
(bytes[3] & 0xff);
return result;
}
function match(ip) {
var left = 0, right = cnips.length;
do {
var mid = Math.floor((left + right) / 2),
ipf = (ip & cnips[mid][1]) >>> 0,
m = (cnips[mid][0] & cnips[mid][1]) >>> 0;
if (ipf == m) {
return true;
} else if (ipf > m) {
left = mid + 1;
} else {
right = mid;
}
} while (left + 1 <= right)
return false;
}
function testDomain(target, domains, cnRootIncluded) {
var idxA = target.lastIndexOf('.');
var idxB = target.lastIndexOf('.', idxA - 1);
var hasOwnProperty = Object.hasOwnProperty;
var suffix = cnRootIncluded ? target.substring(idxA + 1) : '';
if (suffix === 'cn') {
return true;
}
while (true) {
if (idxB === -1) {
if (hasOwnProperty.call(domains, target)) {
return true;
} else {
return false;
}
}
suffix = target.substring(idxB + 1);
if (hasOwnProperty.call(domains, suffix)) {
return true;
}
idxB = target.lastIndexOf('.', idxB - 1);
}
}
function FindProxyForURL(url, host) {
if (isPlainHostName(host)
|| host === '127.0.0.1'
|| host === 'localhost') {
return direct;
}
if (testDomain(host, directDomains, true)) {
return direct
}
if (testDomain(host, domains)) {
return proxy;
}
var strIp = dnsResolve(host);
if (!strIp) {
return proxy;
}
var intIp = convertAddress(strIp);
if (match(intIp)) {
return direct;
}
return proxy;
}

6523
tld.txt Normal file
View File

File diff suppressed because it is too large Load Diff