Small fixes + load own list through API

This commit is contained in:
relikd
2020-09-04 08:59:23 +02:00
parent 1d731e709f
commit 6f215595b6
12 changed files with 120 additions and 60 deletions

View File

@@ -1,7 +1,11 @@
<?php
// ini_set('display_errors', 1);
// ini_set('display_startup_errors', 1);
// error_reporting(E_ALL);
$proj_root = dirname(dirname(dirname(__DIR__)));
function is_valid_bundle($bundle_id) {
# check valid bundle id, same regex as in `common_lib.py`
return preg_match('/^[A-Za-z0-9\.\-]{1,155}$/', $bundle_id);
}
@@ -49,10 +53,7 @@ function make_output($msg, $url=null, $when=null, $key=null) {
}
function response_success($bundle_id, $key) {
# check valid bundle id, same regex as in `common_lib.py`
if ($bundle_id) {
$url = 'https://appchk.de/app/'.$bundle_id.'/index.html';
}
$url = $bundle_id ? 'https://appchk.de/app/'.$bundle_id.'/index.html' : null;
# next update will be in ... X seconds (up to 1 min)
make_output('ok', $url, ceil(time()/120)*120 - time(), $key);
}

View File

@@ -0,0 +1,4 @@
<?php
header("Content-Type: text/plain");
readfile("list.txt");
?>

7
api/v1/trackers/list.txt Normal file
View File

@@ -0,0 +1,7 @@
adjust.com
app-measurement.com
criteo.com
firebasedynamiclinks-ipv4.googleapis.com
firebasedynamiclinks-ipv6.googleapis.com
ioam.de
userreport.com

View File

@@ -99,7 +99,7 @@ td { padding: 0.2em 1em 0.2em 0.1em; }
#meta #appicon { float: right; }
}
#meta td:nth-child(2) { font-weight: bold }
#connections i {
#connections i:not(.empty) {
font-size: 0.9em;
font-style: normal;
font-weight: normal;

View File

@@ -125,7 +125,10 @@ def mv(path, to, printOmitPrefix=None):
def rm(path):
try:
shutil.rmtree(path)
except Exception:
pass
def dir_exists(path):

View File

@@ -2,10 +2,10 @@
import sys
import common_lib as mylib
import matplotlib
import matplotlib.pyplot as plt
# import matplotlib
# import matplotlib.pyplot as plt
matplotlib.use('Agg') # disable interactive mode
# matplotlib.use('Agg') # disable interactive mode
def sort_dict(count_dict):
@@ -18,12 +18,12 @@ def sort_dict(count_dict):
def gen_graph(count_dict, outfile, overwrite=False):
if mylib.file_exists(outfile) and not overwrite:
return
names, sizes = sort_dict(count_dict)
pie1, _ = plt.pie(sizes, labels=names)
plt.setp(pie1, width=0.5, edgecolor='white')
plt.subplots_adjust(left=0, right=1, top=0.7, bottom=0.3)
plt.savefig(outfile, bbox_inches='tight', pad_inches=0) # transparent=True
plt.close()
# names, sizes = sort_dict(count_dict)
# pie1, _ = plt.pie(sizes, labels=names)
# plt.setp(pie1, width=0.5, edgecolor='white')
# plt.subplots_adjust(left=0, right=1, top=0.7, bottom=0.3)
# plt.savefig(outfile, bbox_inches='tight', pad_inches=0) # transparent=True
# plt.close()
def seconds_to_time(seconds):
@@ -39,7 +39,7 @@ def gen_dom_tags(unsorted_dict, trackers=None):
clss = ' class="bad"' if trackers and trackers[x] else ''
title = x # if y == 1 else '{} ({})'.format(x, y)
res.append('<i{}>{}</i>'.format(clss, title))
return ' '.join(res)
return ' '.join(res) if len(res) > 0 else '<i class="empty"> None </i>'
def gen_html(bundle_id, obj):
@@ -92,13 +92,11 @@ def make_bundle_out(bundle_id, forceGraphs=False):
if not mylib.dir_exists(out_dir):
needs_update_index = True
mylib.mkdir(out_dir)
try:
gen_graph(json['total_subdom'], mylib.path_add(out_dir, 'sub.svg'),
overwrite=forceGraphs)
gen_graph(json['total_pardom'], mylib.path_add(out_dir, 'par.svg'),
overwrite=forceGraphs)
except KeyError:
mylib.err('bundle-generate-page', 'skip: ' + bundle_id)
with open(mylib.path_add(out_dir, 'index.html'), 'w') as fp:
fp.write(gen_html(bundle_id, json))

View File

@@ -71,7 +71,7 @@ def process(per_page=60):
print(' {} apps'.format(apps_total))
print(' {} pages'.format(pages_total))
apps_sorted = sorted(apps, key=lambda x: (x['name'], x['id']))
apps_sorted = sorted(apps, key=lambda x: (x['name'].lower(), x['id']))
for x in range(1, pages_total + 1):
start = (x - 1) * per_page
batch = apps_sorted[start:start + per_page]

View File

@@ -1,3 +1,4 @@
03.25.822.66
07.151.822.76
1010elpmaxe.lgp
@@ -79,6 +80,7 @@ az.oc.loi.vrsda
az.oc.maertskcilc
az.oc.pmet.enozda
az.oc.retnuocbew
az.oc.sisoa.xnepo
az.oc.statslosba
bulc.buhgninim
bulc.esdrawdrahc
@@ -91,6 +93,7 @@ bulc.vreskcilcww
bulc.weniartsimer
bup.ixp
cc.afobeuy
cc.ameht.revresda
cc.asu
cc.cpeed
cc.egdirbaidem
@@ -692,6 +695,7 @@ if.yoswamonas.revresda
if.yoswamonas.segamida
il.qart
is.8seivomx.da
is.enizagamytic.sda
kcilc.baldael
kcilc.gnitcelloc
kcilc.revresda-dica
@@ -1459,6 +1463,7 @@ moc.bxdaran
moc.byveownxuu
moc.c2b
moc.ca-vres
moc.cabanna.sda
moc.caisidarac.da
moc.calsaweb
moc.canrocobe
@@ -1778,7 +1783,6 @@ moc.ecalptekramyalpsid
moc.ecaps.sda
moc.ecapsdaerahs
moc.ecapsediw
moc.ecapsesitrevda
moc.ecapsgnitallitnics
moc.ecapsneek.da
moc.ecapsrennab
@@ -2291,6 +2295,7 @@ moc.evihnioc
moc.evil-ci
moc.evil.scitylana
moc.evil8ebut.afi
moc.evila.sda
moc.evilci
moc.evilcitoragem.sbm
moc.evilmi.hsacp
@@ -3716,7 +3721,6 @@ moc.pazyeh.sda
moc.pazyeh.x
moc.pb-nepoderuces
moc.pcotsezda
moc.pdclasr
moc.pecxeferllun.yx
moc.peehsskcits
moc.peehsyllautca
@@ -4693,8 +4697,6 @@ moc.sihtgnitsohsiohw.sda
moc.sihtnuoc
moc.silb
moc.sipa5mv
moc.sipaelgoog.4vpi-sknilcimanydesaberif
moc.sipaelgoog.6vpi-sknilcimanydesaberif
moc.sipaelgoog.ap-scirtemtneilc
moc.sipaxm
moc.sirales
@@ -5384,7 +5386,6 @@ moc.tneitouqc
moc.tnemecnerefer-elliev
moc.tnemeergasuoitibma
moc.tnemeganambjc
moc.tnemerusaem-ppa
moc.tnemerusaemllac
moc.tnemetatsrotaluclac
moc.tnemges
@@ -6074,6 +6075,7 @@ nv.ndcma
nv.revresamitda
nv.statsog
oc.0202107732g
oc.0308155653g
oc.0639439613g
oc.0960813772g
oc.1-d
@@ -6659,6 +6661,7 @@ swen.emtrela
ta.aweo
ta.kcrt
ta.m4da
ta.nim5.sda
ta.ogz.cg
ta.recnalab-da
ta.rwar
@@ -7354,7 +7357,6 @@ ts.jda
ts.urtigid
ts.urtigid.lexip
tseb.gninim
tsoh.2nrevresda
tsoh.edoc-ndc
tsoh.sfscitats
tsoh.sug
@@ -7380,6 +7382,7 @@ ue.41x.stats
ue.aade
ue.aeraaidem
ue.aidemci.rennab
ue.aidemgk.xnepo
ue.alecag
ue.cafsda
ue.cilliga
@@ -7439,6 +7442,7 @@ ur.1kvtn
ur.2puxul
ur.42tats
ur.452aketpa.sda
ur.95bb.sda
ur.9pnw3
ur.aifam.001pot
ur.ariv
@@ -7689,6 +7693,7 @@ zc.retnuocbew
zc.rotkelfer.amalker
zc.selasrebyc.sda
zc.srv
zc.teniks.xnepo
zc.tenorim.amalker
zc.tenx.2sda
zc.tenxin.rennab
@@ -7710,6 +7715,7 @@ zc.xobonik.sda
zc.xyn.sda
zc.yaw.da
zc.ygider.kinab
zc.yratyk.2xnepo
zc.yrdom.sgn.saila.vomodyksec
zc.yrennab
zc.zw.da

View File

@@ -1193,6 +1193,7 @@ g2773180690.co
g2971265025.co
g3169349360.co
g3367433695.co
g3565518030.co
gaconnector.com
gameanalytics.com
gammachug.com

View File

@@ -377,6 +377,7 @@ ads-twitter.com
ads.365.mk
ads.4tube.com
ads.5ci.lt
ads.5min.at
ads.73dpi.com
ads.aavv.com
ads.abovetopsecret.com
@@ -393,9 +394,11 @@ ads.adultfriendfinder.com
ads.advance.net
ads.adverline.com
ads.affiliates.match.com
ads.alive.com
ads.alt.com
ads.amdmb.com
ads.amigos.com
ads.annabac.com
ads.aol.co.uk
ads.apn.co.nz
ads.appsgeyser.com
@@ -406,6 +409,7 @@ ads.asiafriendfinder.com
ads.ask.com
ads.aspalliance.com
ads.avazu.net
ads.bb59.ru
ads.belointeractive.com
ads.betfair.com
ads.bigchurch.com
@@ -433,6 +437,7 @@ ads.cc-dt.com
ads.centraliprom.com
ads.channel4.com
ads.cheabit.com
ads.citymagazine.si
ads.clasificadox.com
ads.clearchannel.com
ads.co.com
@@ -839,6 +844,7 @@ adserver.sanomawsoy.fi
adserver.sciflicks.com
adserver.sharewareonline.com
adserver.spankaway.com
adserver.thema.cc
adserver.theonering.net
adserver.twitpic.com
adserver.viagogo.com
@@ -849,7 +855,6 @@ adserver1-images.backbeatmedia.com
adserver1.backbeatmedia.com
adserver1.mindshare.de
adserver2.mindshare.de
adservern2.host
adserverplus.com
adserverpub.com
adserversolutions.com
@@ -944,7 +949,6 @@ advertise.com
advertiseireland.com
advertisementafterthought.com
advertiserurl.com
advertisespace.com
advertising.com
advertisingbanners.com
advertisingbox.com
@@ -2491,8 +2495,12 @@ openadsnetwork.com
opentag-stats.qubit.com
openx.actvtech.com
openx.angelsgroup.org.uk
openx.aosis.co.za
openx.cairo360.com
openx.kgmedia.eu
openx.skinet.cz
openx.smcaen.fr
openx2.kytary.cz
operationkettle.com
opienetwork.com
opmnstr.com
@@ -2827,7 +2835,6 @@ rotabanner.com
roughroll.com
roxr.net
rs6.net
rsalcdp.com
rta.dailymail.co.uk
rtb.gumgum.com
rtb.openx.net

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import os
import sys
import common_lib as mylib
import bundle_combine
import bundle_download
@@ -10,6 +11,18 @@ import html_bundle
import tracker_download
def print_usage_and_exit():
mylib.usage(__file__, 'command [params]')
print('''
import | check '_in' folder for new apps
tracker | update tracking domains
icons | check & download missing icons
run [bundle_id] [...] | recombine and rebuild apps
del [bundle_id] [...] | remove app and rebuild index
''')
exit(0)
def del_id(bundle_ids):
if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_appids())
@@ -57,15 +70,25 @@ def tracker_update():
combine_and_update(['*'], where=new_trackers)
def process():
# del_id(['_manually'])
if __name__ == '__main__':
args = sys.argv[1:]
if len(args) == 0:
print_usage_and_exit()
cmd = args[0]
params = args[1:]
if cmd == 'import':
import_update()
# tracker_update()
elif cmd == 'del':
if len(params) == 0:
print_usage_and_exit()
del_id(params) # ['_manually']
elif cmd == 'run':
if len(params) == 0:
print_usage_and_exit()
combine_and_update(params) # ['*'], where=['test.com']
elif cmd == 'icons':
if bundle_download.download_missing_icons(force=False):
html_index.process()
elif cmd == 'tracker':
tracker_update()
# tracker_download.combine_all('x')
# combine_and_update(['*']) # where=['test.com']
# if bundle_download.download_missing_icons(force=False):
# html_index.process()
# html_index.process()
process()

View File

@@ -16,8 +16,8 @@ known_trackers = None
def save_list(result_set, fname, binary=False):
if not result_set:
return False
out = mylib.path_root('src', 'lists', fname)
return []
out = mylib.path_root('src', 'lists', 'tracker_' + fname)
with open(out + '_tmp', 'wb' if binary else 'w') as fp:
end = b'\n' if binary else '\n'
for domain in sorted(result_set):
@@ -37,11 +37,14 @@ def save_list(result_set, fname, binary=False):
def enum_lines(url, ignore=None):
try:
whole = mylib.download(url)
for line in whole.split(b'\n'):
if not line or ignore and line.startswith(ignore):
continue
yield line
except Exception as e:
mylib.err('tracker-download', str(e) + ' in ' + url)
def github(path):
@@ -53,6 +56,13 @@ def lockdown(fname, urlname):
return save_list(set(enum_lines(url)), fname, binary=True)
def customlist(fname):
# We could access the 'list.txt' file directly on this server
# However, we can't separate the api from the website then
url = 'https://appchk.de/api/v1/trackers/'
return save_list(set(enum_lines(url)), fname, binary=True)
def easylist(fname, urlname):
url = github('easylist/easylist/master/easyprivacy/') + urlname
res = set()
@@ -130,15 +140,15 @@ def combine_all(changes):
def process():
print('downloading tracker domains ...')
changes = []
changes += lowe('tracker_lowe.txt')
changes += easylist('tracker_easylist.txt',
'easyprivacy_trackingservers.txt')
changes += easylist('tracker_easylist_int.txt',
changes += customlist('custom.txt')
changes += lowe('lowe.txt')
changes += easylist('easylist.txt', 'easyprivacy_trackingservers.txt')
changes += easylist('easylist_int.txt',
'easyprivacy_trackingservers_international.txt')
changes += exodus('tracker_exodus.txt')
# changes += lockdown('tracker_lockdown_clickbait.txt', 'clickbait.txt')
# changes += lockdown('tracker_lockdown_marketing.txt', 'marketing.txt')
# changes += lockdown('tracker_lockdown_game_ads.txt', 'game_ads.txt')
changes += exodus('exodus.txt')
# changes += lockdown('lockdown_clickbait.txt', 'clickbait.txt')
# changes += lockdown('lockdown_marketing.txt', 'marketing.txt')
# changes += lockdown('lockdown_game_ads.txt', 'game_ads.txt')
combine_all(changes)
print('')
return changes