From 6f215595b6e1aca82145e84d185ca3f0c5f4c3a8 Mon Sep 17 00:00:00 2001 From: relikd Date: Fri, 4 Sep 2020 08:59:23 +0200 Subject: [PATCH] Small fixes + load own list through API --- api/v1/contribute/index.php | 9 ++++--- api/v1/trackers/index.php | 4 +++ api/v1/trackers/list.txt | 7 +++++ out/static/style.css | 2 +- src/common_lib.py | 5 +++- src/html_bundle.py | 32 +++++++++++------------ src/html_index.py | 2 +- src/lists/tracker_all.txt | 18 ++++++++----- src/lists/tracker_easylist.txt | 1 + src/lists/tracker_lowe.txt | 13 +++++++--- src/main.py | 47 +++++++++++++++++++++++++--------- src/tracker_download.py | 40 ++++++++++++++++++----------- 12 files changed, 120 insertions(+), 60 deletions(-) create mode 100644 api/v1/trackers/index.php create mode 100644 api/v1/trackers/list.txt diff --git a/api/v1/contribute/index.php b/api/v1/contribute/index.php index 8b53717..f1a25d8 100644 --- a/api/v1/contribute/index.php +++ b/api/v1/contribute/index.php @@ -1,7 +1,11 @@ diff --git a/api/v1/trackers/list.txt b/api/v1/trackers/list.txt new file mode 100644 index 0000000..d4823ab --- /dev/null +++ b/api/v1/trackers/list.txt @@ -0,0 +1,7 @@ +adjust.com +app-measurement.com +criteo.com +firebasedynamiclinks-ipv4.googleapis.com +firebasedynamiclinks-ipv6.googleapis.com +ioam.de +userreport.com diff --git a/out/static/style.css b/out/static/style.css index d2640e9..8272833 100644 --- a/out/static/style.css +++ b/out/static/style.css @@ -99,7 +99,7 @@ td { padding: 0.2em 1em 0.2em 0.1em; } #meta #appicon { float: right; } } #meta td:nth-child(2) { font-weight: bold } -#connections i { +#connections i:not(.empty) { font-size: 0.9em; font-style: normal; font-weight: normal; diff --git a/src/common_lib.py b/src/common_lib.py index 7d137a6..94f2f43 100755 --- a/src/common_lib.py +++ b/src/common_lib.py @@ -125,7 +125,10 @@ def mv(path, to, printOmitPrefix=None): def rm(path): - shutil.rmtree(path) + try: + shutil.rmtree(path) + except Exception: + pass def dir_exists(path): diff --git a/src/html_bundle.py b/src/html_bundle.py index 6896f65..196c660 100755 --- a/src/html_bundle.py +++ b/src/html_bundle.py @@ -2,10 +2,10 @@ import sys import common_lib as mylib -import matplotlib -import matplotlib.pyplot as plt +# import matplotlib +# import matplotlib.pyplot as plt -matplotlib.use('Agg') # disable interactive mode +# matplotlib.use('Agg') # disable interactive mode def sort_dict(count_dict): @@ -18,12 +18,12 @@ def sort_dict(count_dict): def gen_graph(count_dict, outfile, overwrite=False): if mylib.file_exists(outfile) and not overwrite: return - names, sizes = sort_dict(count_dict) - pie1, _ = plt.pie(sizes, labels=names) - plt.setp(pie1, width=0.5, edgecolor='white') - plt.subplots_adjust(left=0, right=1, top=0.7, bottom=0.3) - plt.savefig(outfile, bbox_inches='tight', pad_inches=0) # transparent=True - plt.close() + # names, sizes = sort_dict(count_dict) + # pie1, _ = plt.pie(sizes, labels=names) + # plt.setp(pie1, width=0.5, edgecolor='white') + # plt.subplots_adjust(left=0, right=1, top=0.7, bottom=0.3) + # plt.savefig(outfile, bbox_inches='tight', pad_inches=0) # transparent=True + # plt.close() def seconds_to_time(seconds): @@ -39,7 +39,7 @@ def gen_dom_tags(unsorted_dict, trackers=None): clss = ' class="bad"' if trackers and trackers[x] else '' title = x # if y == 1 else '{} ({})'.format(x, y) res.append('{}'.format(clss, title)) - return ' '.join(res) + return ' '.join(res) if len(res) > 0 else '– None –' def gen_html(bundle_id, obj): @@ -92,13 +92,11 @@ def make_bundle_out(bundle_id, forceGraphs=False): if not mylib.dir_exists(out_dir): needs_update_index = True mylib.mkdir(out_dir) - try: - gen_graph(json['total_subdom'], mylib.path_add(out_dir, 'sub.svg'), - overwrite=forceGraphs) - gen_graph(json['total_pardom'], mylib.path_add(out_dir, 'par.svg'), - overwrite=forceGraphs) - except KeyError: - mylib.err('bundle-generate-page', 'skip: ' + bundle_id) + + gen_graph(json['total_subdom'], mylib.path_add(out_dir, 'sub.svg'), + overwrite=forceGraphs) + gen_graph(json['total_pardom'], mylib.path_add(out_dir, 'par.svg'), + overwrite=forceGraphs) with open(mylib.path_add(out_dir, 'index.html'), 'w') as fp: fp.write(gen_html(bundle_id, json)) diff --git a/src/html_index.py b/src/html_index.py index 90ee452..b257fae 100755 --- a/src/html_index.py +++ b/src/html_index.py @@ -71,7 +71,7 @@ def process(per_page=60): print(' {} apps'.format(apps_total)) print(' {} pages'.format(pages_total)) - apps_sorted = sorted(apps, key=lambda x: (x['name'], x['id'])) + apps_sorted = sorted(apps, key=lambda x: (x['name'].lower(), x['id'])) for x in range(1, pages_total + 1): start = (x - 1) * per_page batch = apps_sorted[start:start + per_page] diff --git a/src/lists/tracker_all.txt b/src/lists/tracker_all.txt index 809c684..5da3731 100644 --- a/src/lists/tracker_all.txt +++ b/src/lists/tracker_all.txt @@ -1,3 +1,4 @@ + 03.25.822.66 07.151.822.76 1010elpmaxe.lgp @@ -79,6 +80,7 @@ az.oc.loi.vrsda az.oc.maertskcilc az.oc.pmet.enozda az.oc.retnuocbew +az.oc.sisoa.xnepo az.oc.statslosba bulc.buhgninim bulc.esdrawdrahc @@ -91,6 +93,7 @@ bulc.vreskcilcww bulc.weniartsimer bup.ixp cc.afobeuy +cc.ameht.revresda cc.asu cc.cpeed cc.egdirbaidem @@ -692,6 +695,7 @@ if.yoswamonas.revresda if.yoswamonas.segamida il.qart is.8seivomx.da +is.enizagamytic.sda kcilc.baldael kcilc.gnitcelloc kcilc.revresda-dica @@ -1459,6 +1463,7 @@ moc.bxdaran moc.byveownxuu moc.c2b moc.ca-vres +moc.cabanna.sda moc.caisidarac.da moc.calsaweb moc.canrocobe @@ -1778,7 +1783,6 @@ moc.ecalptekramyalpsid moc.ecaps.sda moc.ecapsdaerahs moc.ecapsediw -moc.ecapsesitrevda moc.ecapsgnitallitnics moc.ecapsneek.da moc.ecapsrennab @@ -2291,6 +2295,7 @@ moc.evihnioc moc.evil-ci moc.evil.scitylana moc.evil8ebut.afi +moc.evila.sda moc.evilci moc.evilcitoragem.sbm moc.evilmi.hsacp @@ -3716,7 +3721,6 @@ moc.pazyeh.sda moc.pazyeh.x moc.pb-nepoderuces moc.pcotsezda -moc.pdclasr moc.pecxeferllun.yx moc.peehsskcits moc.peehsyllautca @@ -4693,8 +4697,6 @@ moc.sihtgnitsohsiohw.sda moc.sihtnuoc moc.silb moc.sipa5mv -moc.sipaelgoog.4vpi-sknilcimanydesaberif -moc.sipaelgoog.6vpi-sknilcimanydesaberif moc.sipaelgoog.ap-scirtemtneilc moc.sipaxm moc.sirales @@ -5384,7 +5386,6 @@ moc.tneitouqc moc.tnemecnerefer-elliev moc.tnemeergasuoitibma moc.tnemeganambjc -moc.tnemerusaem-ppa moc.tnemerusaemllac moc.tnemetatsrotaluclac moc.tnemges @@ -6074,6 +6075,7 @@ nv.ndcma nv.revresamitda nv.statsog oc.0202107732g +oc.0308155653g oc.0639439613g oc.0960813772g oc.1-d @@ -6659,6 +6661,7 @@ swen.emtrela ta.aweo ta.kcrt ta.m4da +ta.nim5.sda ta.ogz.cg ta.recnalab-da ta.rwar @@ -7354,7 +7357,6 @@ ts.jda ts.urtigid ts.urtigid.lexip tseb.gninim -tsoh.2nrevresda tsoh.edoc-ndc tsoh.sfscitats tsoh.sug @@ -7380,6 +7382,7 @@ ue.41x.stats ue.aade ue.aeraaidem ue.aidemci.rennab +ue.aidemgk.xnepo ue.alecag ue.cafsda ue.cilliga @@ -7439,6 +7442,7 @@ ur.1kvtn ur.2puxul ur.42tats ur.452aketpa.sda +ur.95bb.sda ur.9pnw3 ur.aifam.001pot ur.ariv @@ -7689,6 +7693,7 @@ zc.retnuocbew zc.rotkelfer.amalker zc.selasrebyc.sda zc.srv +zc.teniks.xnepo zc.tenorim.amalker zc.tenx.2sda zc.tenxin.rennab @@ -7710,6 +7715,7 @@ zc.xobonik.sda zc.xyn.sda zc.yaw.da zc.ygider.kinab +zc.yratyk.2xnepo zc.yrdom.sgn.saila.vomodyksec zc.yrennab zc.zw.da diff --git a/src/lists/tracker_easylist.txt b/src/lists/tracker_easylist.txt index 6012014..31e62eb 100644 --- a/src/lists/tracker_easylist.txt +++ b/src/lists/tracker_easylist.txt @@ -1193,6 +1193,7 @@ g2773180690.co g2971265025.co g3169349360.co g3367433695.co +g3565518030.co gaconnector.com gameanalytics.com gammachug.com diff --git a/src/lists/tracker_lowe.txt b/src/lists/tracker_lowe.txt index 9fc8242..6d843d7 100644 --- a/src/lists/tracker_lowe.txt +++ b/src/lists/tracker_lowe.txt @@ -377,6 +377,7 @@ ads-twitter.com ads.365.mk ads.4tube.com ads.5ci.lt +ads.5min.at ads.73dpi.com ads.aavv.com ads.abovetopsecret.com @@ -393,9 +394,11 @@ ads.adultfriendfinder.com ads.advance.net ads.adverline.com ads.affiliates.match.com +ads.alive.com ads.alt.com ads.amdmb.com ads.amigos.com +ads.annabac.com ads.aol.co.uk ads.apn.co.nz ads.appsgeyser.com @@ -406,6 +409,7 @@ ads.asiafriendfinder.com ads.ask.com ads.aspalliance.com ads.avazu.net +ads.bb59.ru ads.belointeractive.com ads.betfair.com ads.bigchurch.com @@ -433,6 +437,7 @@ ads.cc-dt.com ads.centraliprom.com ads.channel4.com ads.cheabit.com +ads.citymagazine.si ads.clasificadox.com ads.clearchannel.com ads.co.com @@ -839,6 +844,7 @@ adserver.sanomawsoy.fi adserver.sciflicks.com adserver.sharewareonline.com adserver.spankaway.com +adserver.thema.cc adserver.theonering.net adserver.twitpic.com adserver.viagogo.com @@ -849,7 +855,6 @@ adserver1-images.backbeatmedia.com adserver1.backbeatmedia.com adserver1.mindshare.de adserver2.mindshare.de -adservern2.host adserverplus.com adserverpub.com adserversolutions.com @@ -944,7 +949,6 @@ advertise.com advertiseireland.com advertisementafterthought.com advertiserurl.com -advertisespace.com advertising.com advertisingbanners.com advertisingbox.com @@ -2491,8 +2495,12 @@ openadsnetwork.com opentag-stats.qubit.com openx.actvtech.com openx.angelsgroup.org.uk +openx.aosis.co.za openx.cairo360.com +openx.kgmedia.eu +openx.skinet.cz openx.smcaen.fr +openx2.kytary.cz operationkettle.com opienetwork.com opmnstr.com @@ -2827,7 +2835,6 @@ rotabanner.com roughroll.com roxr.net rs6.net -rsalcdp.com rta.dailymail.co.uk rtb.gumgum.com rtb.openx.net diff --git a/src/main.py b/src/main.py index 56c63f6..0fb0552 100755 --- a/src/main.py +++ b/src/main.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import os +import sys import common_lib as mylib import bundle_combine import bundle_download @@ -10,6 +11,18 @@ import html_bundle import tracker_download +def print_usage_and_exit(): + mylib.usage(__file__, 'command [params]') + print(''' + import | check '_in' folder for new apps + tracker | update tracking domains + icons | check & download missing icons + run [bundle_id] [...] | recombine and rebuild apps + del [bundle_id] [...] | remove app and rebuild index +''') + exit(0) + + def del_id(bundle_ids): if bundle_ids == ['*']: bundle_ids = list(mylib.enum_appids()) @@ -57,15 +70,25 @@ def tracker_update(): combine_and_update(['*'], where=new_trackers) -def process(): - # del_id(['_manually']) - import_update() - # tracker_update() - # tracker_download.combine_all('x') - # combine_and_update(['*']) # where=['test.com'] - # if bundle_download.download_missing_icons(force=False): - # html_index.process() - # html_index.process() - - -process() +if __name__ == '__main__': + args = sys.argv[1:] + if len(args) == 0: + print_usage_and_exit() + cmd = args[0] + params = args[1:] + if cmd == 'import': + import_update() + elif cmd == 'del': + if len(params) == 0: + print_usage_and_exit() + del_id(params) # ['_manually'] + elif cmd == 'run': + if len(params) == 0: + print_usage_and_exit() + combine_and_update(params) # ['*'], where=['test.com'] + elif cmd == 'icons': + if bundle_download.download_missing_icons(force=False): + html_index.process() + elif cmd == 'tracker': + tracker_update() + # tracker_download.combine_all('x') diff --git a/src/tracker_download.py b/src/tracker_download.py index 0c1053c..1f8f2a3 100755 --- a/src/tracker_download.py +++ b/src/tracker_download.py @@ -16,8 +16,8 @@ known_trackers = None def save_list(result_set, fname, binary=False): if not result_set: - return False - out = mylib.path_root('src', 'lists', fname) + return [] + out = mylib.path_root('src', 'lists', 'tracker_' + fname) with open(out + '_tmp', 'wb' if binary else 'w') as fp: end = b'\n' if binary else '\n' for domain in sorted(result_set): @@ -37,11 +37,14 @@ def save_list(result_set, fname, binary=False): def enum_lines(url, ignore=None): - whole = mylib.download(url) - for line in whole.split(b'\n'): - if not line or ignore and line.startswith(ignore): - continue - yield line + try: + whole = mylib.download(url) + for line in whole.split(b'\n'): + if not line or ignore and line.startswith(ignore): + continue + yield line + except Exception as e: + mylib.err('tracker-download', str(e) + ' in ' + url) def github(path): @@ -53,6 +56,13 @@ def lockdown(fname, urlname): return save_list(set(enum_lines(url)), fname, binary=True) +def customlist(fname): + # We could access the 'list.txt' file directly on this server + # However, we can't separate the api from the website then + url = 'https://appchk.de/api/v1/trackers/' + return save_list(set(enum_lines(url)), fname, binary=True) + + def easylist(fname, urlname): url = github('easylist/easylist/master/easyprivacy/') + urlname res = set() @@ -130,15 +140,15 @@ def combine_all(changes): def process(): print('downloading tracker domains ...') changes = [] - changes += lowe('tracker_lowe.txt') - changes += easylist('tracker_easylist.txt', - 'easyprivacy_trackingservers.txt') - changes += easylist('tracker_easylist_int.txt', + changes += customlist('custom.txt') + changes += lowe('lowe.txt') + changes += easylist('easylist.txt', 'easyprivacy_trackingservers.txt') + changes += easylist('easylist_int.txt', 'easyprivacy_trackingservers_international.txt') - changes += exodus('tracker_exodus.txt') - # changes += lockdown('tracker_lockdown_clickbait.txt', 'clickbait.txt') - # changes += lockdown('tracker_lockdown_marketing.txt', 'marketing.txt') - # changes += lockdown('tracker_lockdown_game_ads.txt', 'game_ads.txt') + changes += exodus('exodus.txt') + # changes += lockdown('lockdown_clickbait.txt', 'clickbait.txt') + # changes += lockdown('lockdown_marketing.txt', 'marketing.txt') + # changes += lockdown('lockdown_game_ads.txt', 'game_ads.txt') combine_all(changes) print('') return changes