Small fixes + load own list through API

This commit is contained in:
relikd
2020-09-04 08:59:23 +02:00
parent 1d731e709f
commit 6f215595b6
12 changed files with 120 additions and 60 deletions

View File

@@ -1,7 +1,11 @@
<?php <?php
// ini_set('display_errors', 1);
// ini_set('display_startup_errors', 1);
// error_reporting(E_ALL);
$proj_root = dirname(dirname(dirname(__DIR__))); $proj_root = dirname(dirname(dirname(__DIR__)));
function is_valid_bundle($bundle_id) { function is_valid_bundle($bundle_id) {
# check valid bundle id, same regex as in `common_lib.py`
return preg_match('/^[A-Za-z0-9\.\-]{1,155}$/', $bundle_id); return preg_match('/^[A-Za-z0-9\.\-]{1,155}$/', $bundle_id);
} }
@@ -49,10 +53,7 @@ function make_output($msg, $url=null, $when=null, $key=null) {
} }
function response_success($bundle_id, $key) { function response_success($bundle_id, $key) {
# check valid bundle id, same regex as in `common_lib.py` $url = $bundle_id ? 'https://appchk.de/app/'.$bundle_id.'/index.html' : null;
if ($bundle_id) {
$url = 'https://appchk.de/app/'.$bundle_id.'/index.html';
}
# next update will be in ... X seconds (up to 1 min) # next update will be in ... X seconds (up to 1 min)
make_output('ok', $url, ceil(time()/120)*120 - time(), $key); make_output('ok', $url, ceil(time()/120)*120 - time(), $key);
} }

View File

@@ -0,0 +1,4 @@
<?php
header("Content-Type: text/plain");
readfile("list.txt");
?>

7
api/v1/trackers/list.txt Normal file
View File

@@ -0,0 +1,7 @@
adjust.com
app-measurement.com
criteo.com
firebasedynamiclinks-ipv4.googleapis.com
firebasedynamiclinks-ipv6.googleapis.com
ioam.de
userreport.com

View File

@@ -99,7 +99,7 @@ td { padding: 0.2em 1em 0.2em 0.1em; }
#meta #appicon { float: right; } #meta #appicon { float: right; }
} }
#meta td:nth-child(2) { font-weight: bold } #meta td:nth-child(2) { font-weight: bold }
#connections i { #connections i:not(.empty) {
font-size: 0.9em; font-size: 0.9em;
font-style: normal; font-style: normal;
font-weight: normal; font-weight: normal;

View File

@@ -125,7 +125,10 @@ def mv(path, to, printOmitPrefix=None):
def rm(path): def rm(path):
shutil.rmtree(path) try:
shutil.rmtree(path)
except Exception:
pass
def dir_exists(path): def dir_exists(path):

View File

@@ -2,10 +2,10 @@
import sys import sys
import common_lib as mylib import common_lib as mylib
import matplotlib # import matplotlib
import matplotlib.pyplot as plt # import matplotlib.pyplot as plt
matplotlib.use('Agg') # disable interactive mode # matplotlib.use('Agg') # disable interactive mode
def sort_dict(count_dict): def sort_dict(count_dict):
@@ -18,12 +18,12 @@ def sort_dict(count_dict):
def gen_graph(count_dict, outfile, overwrite=False): def gen_graph(count_dict, outfile, overwrite=False):
if mylib.file_exists(outfile) and not overwrite: if mylib.file_exists(outfile) and not overwrite:
return return
names, sizes = sort_dict(count_dict) # names, sizes = sort_dict(count_dict)
pie1, _ = plt.pie(sizes, labels=names) # pie1, _ = plt.pie(sizes, labels=names)
plt.setp(pie1, width=0.5, edgecolor='white') # plt.setp(pie1, width=0.5, edgecolor='white')
plt.subplots_adjust(left=0, right=1, top=0.7, bottom=0.3) # plt.subplots_adjust(left=0, right=1, top=0.7, bottom=0.3)
plt.savefig(outfile, bbox_inches='tight', pad_inches=0) # transparent=True # plt.savefig(outfile, bbox_inches='tight', pad_inches=0) # transparent=True
plt.close() # plt.close()
def seconds_to_time(seconds): def seconds_to_time(seconds):
@@ -39,7 +39,7 @@ def gen_dom_tags(unsorted_dict, trackers=None):
clss = ' class="bad"' if trackers and trackers[x] else '' clss = ' class="bad"' if trackers and trackers[x] else ''
title = x # if y == 1 else '{} ({})'.format(x, y) title = x # if y == 1 else '{} ({})'.format(x, y)
res.append('<i{}>{}</i>'.format(clss, title)) res.append('<i{}>{}</i>'.format(clss, title))
return ' '.join(res) return ' '.join(res) if len(res) > 0 else '<i class="empty"> None </i>'
def gen_html(bundle_id, obj): def gen_html(bundle_id, obj):
@@ -92,13 +92,11 @@ def make_bundle_out(bundle_id, forceGraphs=False):
if not mylib.dir_exists(out_dir): if not mylib.dir_exists(out_dir):
needs_update_index = True needs_update_index = True
mylib.mkdir(out_dir) mylib.mkdir(out_dir)
try:
gen_graph(json['total_subdom'], mylib.path_add(out_dir, 'sub.svg'), gen_graph(json['total_subdom'], mylib.path_add(out_dir, 'sub.svg'),
overwrite=forceGraphs) overwrite=forceGraphs)
gen_graph(json['total_pardom'], mylib.path_add(out_dir, 'par.svg'), gen_graph(json['total_pardom'], mylib.path_add(out_dir, 'par.svg'),
overwrite=forceGraphs) overwrite=forceGraphs)
except KeyError:
mylib.err('bundle-generate-page', 'skip: ' + bundle_id)
with open(mylib.path_add(out_dir, 'index.html'), 'w') as fp: with open(mylib.path_add(out_dir, 'index.html'), 'w') as fp:
fp.write(gen_html(bundle_id, json)) fp.write(gen_html(bundle_id, json))

View File

@@ -71,7 +71,7 @@ def process(per_page=60):
print(' {} apps'.format(apps_total)) print(' {} apps'.format(apps_total))
print(' {} pages'.format(pages_total)) print(' {} pages'.format(pages_total))
apps_sorted = sorted(apps, key=lambda x: (x['name'], x['id'])) apps_sorted = sorted(apps, key=lambda x: (x['name'].lower(), x['id']))
for x in range(1, pages_total + 1): for x in range(1, pages_total + 1):
start = (x - 1) * per_page start = (x - 1) * per_page
batch = apps_sorted[start:start + per_page] batch = apps_sorted[start:start + per_page]

View File

@@ -1,3 +1,4 @@
03.25.822.66 03.25.822.66
07.151.822.76 07.151.822.76
1010elpmaxe.lgp 1010elpmaxe.lgp
@@ -79,6 +80,7 @@ az.oc.loi.vrsda
az.oc.maertskcilc az.oc.maertskcilc
az.oc.pmet.enozda az.oc.pmet.enozda
az.oc.retnuocbew az.oc.retnuocbew
az.oc.sisoa.xnepo
az.oc.statslosba az.oc.statslosba
bulc.buhgninim bulc.buhgninim
bulc.esdrawdrahc bulc.esdrawdrahc
@@ -91,6 +93,7 @@ bulc.vreskcilcww
bulc.weniartsimer bulc.weniartsimer
bup.ixp bup.ixp
cc.afobeuy cc.afobeuy
cc.ameht.revresda
cc.asu cc.asu
cc.cpeed cc.cpeed
cc.egdirbaidem cc.egdirbaidem
@@ -692,6 +695,7 @@ if.yoswamonas.revresda
if.yoswamonas.segamida if.yoswamonas.segamida
il.qart il.qart
is.8seivomx.da is.8seivomx.da
is.enizagamytic.sda
kcilc.baldael kcilc.baldael
kcilc.gnitcelloc kcilc.gnitcelloc
kcilc.revresda-dica kcilc.revresda-dica
@@ -1459,6 +1463,7 @@ moc.bxdaran
moc.byveownxuu moc.byveownxuu
moc.c2b moc.c2b
moc.ca-vres moc.ca-vres
moc.cabanna.sda
moc.caisidarac.da moc.caisidarac.da
moc.calsaweb moc.calsaweb
moc.canrocobe moc.canrocobe
@@ -1778,7 +1783,6 @@ moc.ecalptekramyalpsid
moc.ecaps.sda moc.ecaps.sda
moc.ecapsdaerahs moc.ecapsdaerahs
moc.ecapsediw moc.ecapsediw
moc.ecapsesitrevda
moc.ecapsgnitallitnics moc.ecapsgnitallitnics
moc.ecapsneek.da moc.ecapsneek.da
moc.ecapsrennab moc.ecapsrennab
@@ -2291,6 +2295,7 @@ moc.evihnioc
moc.evil-ci moc.evil-ci
moc.evil.scitylana moc.evil.scitylana
moc.evil8ebut.afi moc.evil8ebut.afi
moc.evila.sda
moc.evilci moc.evilci
moc.evilcitoragem.sbm moc.evilcitoragem.sbm
moc.evilmi.hsacp moc.evilmi.hsacp
@@ -3716,7 +3721,6 @@ moc.pazyeh.sda
moc.pazyeh.x moc.pazyeh.x
moc.pb-nepoderuces moc.pb-nepoderuces
moc.pcotsezda moc.pcotsezda
moc.pdclasr
moc.pecxeferllun.yx moc.pecxeferllun.yx
moc.peehsskcits moc.peehsskcits
moc.peehsyllautca moc.peehsyllautca
@@ -4693,8 +4697,6 @@ moc.sihtgnitsohsiohw.sda
moc.sihtnuoc moc.sihtnuoc
moc.silb moc.silb
moc.sipa5mv moc.sipa5mv
moc.sipaelgoog.4vpi-sknilcimanydesaberif
moc.sipaelgoog.6vpi-sknilcimanydesaberif
moc.sipaelgoog.ap-scirtemtneilc moc.sipaelgoog.ap-scirtemtneilc
moc.sipaxm moc.sipaxm
moc.sirales moc.sirales
@@ -5384,7 +5386,6 @@ moc.tneitouqc
moc.tnemecnerefer-elliev moc.tnemecnerefer-elliev
moc.tnemeergasuoitibma moc.tnemeergasuoitibma
moc.tnemeganambjc moc.tnemeganambjc
moc.tnemerusaem-ppa
moc.tnemerusaemllac moc.tnemerusaemllac
moc.tnemetatsrotaluclac moc.tnemetatsrotaluclac
moc.tnemges moc.tnemges
@@ -6074,6 +6075,7 @@ nv.ndcma
nv.revresamitda nv.revresamitda
nv.statsog nv.statsog
oc.0202107732g oc.0202107732g
oc.0308155653g
oc.0639439613g oc.0639439613g
oc.0960813772g oc.0960813772g
oc.1-d oc.1-d
@@ -6659,6 +6661,7 @@ swen.emtrela
ta.aweo ta.aweo
ta.kcrt ta.kcrt
ta.m4da ta.m4da
ta.nim5.sda
ta.ogz.cg ta.ogz.cg
ta.recnalab-da ta.recnalab-da
ta.rwar ta.rwar
@@ -7354,7 +7357,6 @@ ts.jda
ts.urtigid ts.urtigid
ts.urtigid.lexip ts.urtigid.lexip
tseb.gninim tseb.gninim
tsoh.2nrevresda
tsoh.edoc-ndc tsoh.edoc-ndc
tsoh.sfscitats tsoh.sfscitats
tsoh.sug tsoh.sug
@@ -7380,6 +7382,7 @@ ue.41x.stats
ue.aade ue.aade
ue.aeraaidem ue.aeraaidem
ue.aidemci.rennab ue.aidemci.rennab
ue.aidemgk.xnepo
ue.alecag ue.alecag
ue.cafsda ue.cafsda
ue.cilliga ue.cilliga
@@ -7439,6 +7442,7 @@ ur.1kvtn
ur.2puxul ur.2puxul
ur.42tats ur.42tats
ur.452aketpa.sda ur.452aketpa.sda
ur.95bb.sda
ur.9pnw3 ur.9pnw3
ur.aifam.001pot ur.aifam.001pot
ur.ariv ur.ariv
@@ -7689,6 +7693,7 @@ zc.retnuocbew
zc.rotkelfer.amalker zc.rotkelfer.amalker
zc.selasrebyc.sda zc.selasrebyc.sda
zc.srv zc.srv
zc.teniks.xnepo
zc.tenorim.amalker zc.tenorim.amalker
zc.tenx.2sda zc.tenx.2sda
zc.tenxin.rennab zc.tenxin.rennab
@@ -7710,6 +7715,7 @@ zc.xobonik.sda
zc.xyn.sda zc.xyn.sda
zc.yaw.da zc.yaw.da
zc.ygider.kinab zc.ygider.kinab
zc.yratyk.2xnepo
zc.yrdom.sgn.saila.vomodyksec zc.yrdom.sgn.saila.vomodyksec
zc.yrennab zc.yrennab
zc.zw.da zc.zw.da

View File

@@ -1193,6 +1193,7 @@ g2773180690.co
g2971265025.co g2971265025.co
g3169349360.co g3169349360.co
g3367433695.co g3367433695.co
g3565518030.co
gaconnector.com gaconnector.com
gameanalytics.com gameanalytics.com
gammachug.com gammachug.com

View File

@@ -377,6 +377,7 @@ ads-twitter.com
ads.365.mk ads.365.mk
ads.4tube.com ads.4tube.com
ads.5ci.lt ads.5ci.lt
ads.5min.at
ads.73dpi.com ads.73dpi.com
ads.aavv.com ads.aavv.com
ads.abovetopsecret.com ads.abovetopsecret.com
@@ -393,9 +394,11 @@ ads.adultfriendfinder.com
ads.advance.net ads.advance.net
ads.adverline.com ads.adverline.com
ads.affiliates.match.com ads.affiliates.match.com
ads.alive.com
ads.alt.com ads.alt.com
ads.amdmb.com ads.amdmb.com
ads.amigos.com ads.amigos.com
ads.annabac.com
ads.aol.co.uk ads.aol.co.uk
ads.apn.co.nz ads.apn.co.nz
ads.appsgeyser.com ads.appsgeyser.com
@@ -406,6 +409,7 @@ ads.asiafriendfinder.com
ads.ask.com ads.ask.com
ads.aspalliance.com ads.aspalliance.com
ads.avazu.net ads.avazu.net
ads.bb59.ru
ads.belointeractive.com ads.belointeractive.com
ads.betfair.com ads.betfair.com
ads.bigchurch.com ads.bigchurch.com
@@ -433,6 +437,7 @@ ads.cc-dt.com
ads.centraliprom.com ads.centraliprom.com
ads.channel4.com ads.channel4.com
ads.cheabit.com ads.cheabit.com
ads.citymagazine.si
ads.clasificadox.com ads.clasificadox.com
ads.clearchannel.com ads.clearchannel.com
ads.co.com ads.co.com
@@ -839,6 +844,7 @@ adserver.sanomawsoy.fi
adserver.sciflicks.com adserver.sciflicks.com
adserver.sharewareonline.com adserver.sharewareonline.com
adserver.spankaway.com adserver.spankaway.com
adserver.thema.cc
adserver.theonering.net adserver.theonering.net
adserver.twitpic.com adserver.twitpic.com
adserver.viagogo.com adserver.viagogo.com
@@ -849,7 +855,6 @@ adserver1-images.backbeatmedia.com
adserver1.backbeatmedia.com adserver1.backbeatmedia.com
adserver1.mindshare.de adserver1.mindshare.de
adserver2.mindshare.de adserver2.mindshare.de
adservern2.host
adserverplus.com adserverplus.com
adserverpub.com adserverpub.com
adserversolutions.com adserversolutions.com
@@ -944,7 +949,6 @@ advertise.com
advertiseireland.com advertiseireland.com
advertisementafterthought.com advertisementafterthought.com
advertiserurl.com advertiserurl.com
advertisespace.com
advertising.com advertising.com
advertisingbanners.com advertisingbanners.com
advertisingbox.com advertisingbox.com
@@ -2491,8 +2495,12 @@ openadsnetwork.com
opentag-stats.qubit.com opentag-stats.qubit.com
openx.actvtech.com openx.actvtech.com
openx.angelsgroup.org.uk openx.angelsgroup.org.uk
openx.aosis.co.za
openx.cairo360.com openx.cairo360.com
openx.kgmedia.eu
openx.skinet.cz
openx.smcaen.fr openx.smcaen.fr
openx2.kytary.cz
operationkettle.com operationkettle.com
opienetwork.com opienetwork.com
opmnstr.com opmnstr.com
@@ -2827,7 +2835,6 @@ rotabanner.com
roughroll.com roughroll.com
roxr.net roxr.net
rs6.net rs6.net
rsalcdp.com
rta.dailymail.co.uk rta.dailymail.co.uk
rtb.gumgum.com rtb.gumgum.com
rtb.openx.net rtb.openx.net

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os import os
import sys
import common_lib as mylib import common_lib as mylib
import bundle_combine import bundle_combine
import bundle_download import bundle_download
@@ -10,6 +11,18 @@ import html_bundle
import tracker_download import tracker_download
def print_usage_and_exit():
mylib.usage(__file__, 'command [params]')
print('''
import | check '_in' folder for new apps
tracker | update tracking domains
icons | check & download missing icons
run [bundle_id] [...] | recombine and rebuild apps
del [bundle_id] [...] | remove app and rebuild index
''')
exit(0)
def del_id(bundle_ids): def del_id(bundle_ids):
if bundle_ids == ['*']: if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_appids()) bundle_ids = list(mylib.enum_appids())
@@ -57,15 +70,25 @@ def tracker_update():
combine_and_update(['*'], where=new_trackers) combine_and_update(['*'], where=new_trackers)
def process(): if __name__ == '__main__':
# del_id(['_manually']) args = sys.argv[1:]
import_update() if len(args) == 0:
# tracker_update() print_usage_and_exit()
# tracker_download.combine_all('x') cmd = args[0]
# combine_and_update(['*']) # where=['test.com'] params = args[1:]
# if bundle_download.download_missing_icons(force=False): if cmd == 'import':
# html_index.process() import_update()
# html_index.process() elif cmd == 'del':
if len(params) == 0:
print_usage_and_exit()
process() del_id(params) # ['_manually']
elif cmd == 'run':
if len(params) == 0:
print_usage_and_exit()
combine_and_update(params) # ['*'], where=['test.com']
elif cmd == 'icons':
if bundle_download.download_missing_icons(force=False):
html_index.process()
elif cmd == 'tracker':
tracker_update()
# tracker_download.combine_all('x')

View File

@@ -16,8 +16,8 @@ known_trackers = None
def save_list(result_set, fname, binary=False): def save_list(result_set, fname, binary=False):
if not result_set: if not result_set:
return False return []
out = mylib.path_root('src', 'lists', fname) out = mylib.path_root('src', 'lists', 'tracker_' + fname)
with open(out + '_tmp', 'wb' if binary else 'w') as fp: with open(out + '_tmp', 'wb' if binary else 'w') as fp:
end = b'\n' if binary else '\n' end = b'\n' if binary else '\n'
for domain in sorted(result_set): for domain in sorted(result_set):
@@ -37,11 +37,14 @@ def save_list(result_set, fname, binary=False):
def enum_lines(url, ignore=None): def enum_lines(url, ignore=None):
whole = mylib.download(url) try:
for line in whole.split(b'\n'): whole = mylib.download(url)
if not line or ignore and line.startswith(ignore): for line in whole.split(b'\n'):
continue if not line or ignore and line.startswith(ignore):
yield line continue
yield line
except Exception as e:
mylib.err('tracker-download', str(e) + ' in ' + url)
def github(path): def github(path):
@@ -53,6 +56,13 @@ def lockdown(fname, urlname):
return save_list(set(enum_lines(url)), fname, binary=True) return save_list(set(enum_lines(url)), fname, binary=True)
def customlist(fname):
# We could access the 'list.txt' file directly on this server
# However, we can't separate the api from the website then
url = 'https://appchk.de/api/v1/trackers/'
return save_list(set(enum_lines(url)), fname, binary=True)
def easylist(fname, urlname): def easylist(fname, urlname):
url = github('easylist/easylist/master/easyprivacy/') + urlname url = github('easylist/easylist/master/easyprivacy/') + urlname
res = set() res = set()
@@ -130,15 +140,15 @@ def combine_all(changes):
def process(): def process():
print('downloading tracker domains ...') print('downloading tracker domains ...')
changes = [] changes = []
changes += lowe('tracker_lowe.txt') changes += customlist('custom.txt')
changes += easylist('tracker_easylist.txt', changes += lowe('lowe.txt')
'easyprivacy_trackingservers.txt') changes += easylist('easylist.txt', 'easyprivacy_trackingservers.txt')
changes += easylist('tracker_easylist_int.txt', changes += easylist('easylist_int.txt',
'easyprivacy_trackingservers_international.txt') 'easyprivacy_trackingservers_international.txt')
changes += exodus('tracker_exodus.txt') changes += exodus('exodus.txt')
# changes += lockdown('tracker_lockdown_clickbait.txt', 'clickbait.txt') # changes += lockdown('lockdown_clickbait.txt', 'clickbait.txt')
# changes += lockdown('tracker_lockdown_marketing.txt', 'marketing.txt') # changes += lockdown('lockdown_marketing.txt', 'marketing.txt')
# changes += lockdown('tracker_lockdown_game_ads.txt', 'game_ads.txt') # changes += lockdown('lockdown_game_ads.txt', 'game_ads.txt')
combine_all(changes) combine_all(changes)
print('') print('')
return changes return changes