164 lines
5.0 KiB
Python
Executable File
164 lines
5.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import common_lib as mylib
|
|
# import hashlib
|
|
|
|
known_trackers = None
|
|
|
|
|
|
def is_tracker(domain):
|
|
global known_trackers
|
|
if not known_trackers:
|
|
known_trackers = mylib.read_list('tracker_all.txt')
|
|
return mylib.bintree_lookup(known_trackers, domain[::-1])
|
|
|
|
|
|
# def md5(fname):
|
|
# hash_md5 = hashlib.md5()
|
|
# with open(fname, 'rb') as f:
|
|
# for chunk in iter(lambda: f.read(4096), b''):
|
|
# hash_md5.update(chunk)
|
|
# return hash_md5.hexdigest()
|
|
|
|
|
|
def save_list(result_set, fname, binary=False):
|
|
if not result_set:
|
|
return []
|
|
out = mylib.path_root('src', 'lists', 'tracker_' + fname)
|
|
with open(out + '_tmp', 'wb' if binary else 'w') as fp:
|
|
end = b'\n' if binary else '\n'
|
|
for domain in sorted(result_set):
|
|
fp.write(domain + end)
|
|
try:
|
|
changes = mylib.diff_files(out, out + '_tmp')
|
|
except Exception:
|
|
changes = [str(x) if binary else x for x in result_set]
|
|
mylib.mv(out + '_tmp', out)
|
|
# md5_old = md5(out) if mylib.file_exists(out) else None
|
|
# md5_new = md5(out)
|
|
if changes:
|
|
print(' updating: ' + fname)
|
|
else:
|
|
print(' no-change: ' + fname)
|
|
return changes
|
|
|
|
|
|
def enum_lines(url, ignore=None):
|
|
try:
|
|
whole = mylib.download(url)
|
|
for line in whole.split(b'\n'):
|
|
if not line or ignore and line.startswith(ignore):
|
|
continue
|
|
yield line
|
|
except Exception as e:
|
|
mylib.err('tracker-download', str(e) + ' in ' + url)
|
|
|
|
|
|
def github(path):
|
|
return 'https://raw.githubusercontent.com/' + path
|
|
|
|
|
|
def lockdown(fname, urlname):
|
|
url = github('confirmedcode/lockdown-ios/master/LockdowniOS/') + urlname
|
|
return save_list(set(enum_lines(url)), fname, binary=True)
|
|
|
|
|
|
def customlist(fname):
|
|
# We could access the 'list.txt' file directly on this server
|
|
# However, we can't separate the api from the website then
|
|
url = 'https://appchk.de/api/v1/trackers/'
|
|
return save_list(set(enum_lines(url)), fname, binary=True)
|
|
|
|
|
|
def easylist(fname, urlname):
|
|
url = github('easylist/easylist/master/') + urlname
|
|
res = set()
|
|
for x in enum_lines(url):
|
|
if not x.startswith(b'||'):
|
|
continue
|
|
x = x[2:]
|
|
parts = x.split(b'^')
|
|
if len(parts) == 1:
|
|
parts = x.split(b'$')
|
|
res.add(parts[0].split(b'/')[0])
|
|
return save_list(res, fname, binary=True)
|
|
|
|
|
|
def lowe(fname):
|
|
res = set()
|
|
for x in enum_lines('https://pgl.yoyo.org/adservers/serverlist.php'
|
|
'?hostformat=hosts&mimetype=plaintext', b'#'):
|
|
p = x.split()
|
|
if len(p) != 2:
|
|
mylib.err('tracker-list', 'Lowe: parsing error')
|
|
continue
|
|
res.add(p[1])
|
|
return save_list(res, fname, binary=True)
|
|
|
|
|
|
def exodus(fname):
|
|
res = set()
|
|
url = 'https://etip.exodus-privacy.eu.org/trackers/export'
|
|
json = mylib.download(url, isJSON=True)
|
|
try:
|
|
for entry in json['trackers']:
|
|
net = entry['network_signature']
|
|
if not net:
|
|
continue
|
|
net = net.replace('\\.', '.').replace('\\-', '-')
|
|
for dom in net.split('|'):
|
|
if dom[-1] in '/.':
|
|
continue
|
|
if dom[0] in '\\.':
|
|
dom = dom[1:]
|
|
res.add(dom)
|
|
except KeyError:
|
|
pass
|
|
return save_list(res, fname, binary=False)
|
|
|
|
|
|
def combine_all(changes=['_']):
|
|
final = mylib.path_root('src', 'lists', 'tracker_all.txt')
|
|
if changes or not mylib.file_exists(final):
|
|
print(' updating: tracker_all.txt')
|
|
else:
|
|
print(' no-change: tracker_all.txt')
|
|
return
|
|
res = set()
|
|
for fname in ['custom.txt', 'lowe.txt',
|
|
'easylist.txt', 'easyprivacy', 'easyprivacy_int',
|
|
'exodus.txt', 'lockdown_clickbait.txt',
|
|
'lockdown_marketing.txt', 'lockdown_game_ads.txt']:
|
|
for dom in mylib.read_list('tracker_' + fname):
|
|
if dom == 'google.com':
|
|
continue # added by exodus, not a tracker per se
|
|
res.add(dom[::-1]) # reverse for bintree lookup
|
|
with open(final, 'w') as fp:
|
|
for domain in sorted(res):
|
|
fp.write(domain + '\n')
|
|
|
|
|
|
def process():
|
|
print('downloading tracker domains ...')
|
|
changes = []
|
|
changes += customlist('custom.txt')
|
|
changes += lowe('lowe.txt')
|
|
changes += easylist('easylist.txt',
|
|
'easylist/easylist_adservers.txt')
|
|
changes += easylist('easyprivacy.txt',
|
|
'easyprivacy/easyprivacy_trackingservers.txt')
|
|
changes += easylist('easyprivacy_int.txt',
|
|
'easyprivacy/easyprivacy_trackingservers_international.txt')
|
|
changes += exodus('exodus.txt')
|
|
# changes += lockdown('lockdown_clickbait.txt', 'clickbait.txt')
|
|
# changes += lockdown('lockdown_marketing.txt', 'marketing.txt')
|
|
# changes += lockdown('lockdown_game_ads.txt', 'game_ads.txt')
|
|
combine_all(changes)
|
|
print('')
|
|
return changes
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# combine_all()
|
|
process()
|