Initial
This commit is contained in:
556
src/3rd-domains.txt
Normal file
556
src/3rd-domains.txt
Normal file
@@ -0,0 +1,556 @@
|
||||
at.ac
|
||||
at.co
|
||||
at.gv
|
||||
at.or
|
||||
at.priv
|
||||
au.asn
|
||||
au.com
|
||||
au.csiro
|
||||
au.edu
|
||||
au.gov
|
||||
au.id
|
||||
au.net
|
||||
au.org
|
||||
bm.com
|
||||
bm.edu
|
||||
bm.gov
|
||||
bm.net
|
||||
bm.org
|
||||
br.adm
|
||||
br.adv
|
||||
br.agr
|
||||
br.am
|
||||
br.arq
|
||||
br.art
|
||||
br.ato
|
||||
br.b
|
||||
br.bio
|
||||
br.blog
|
||||
br.bmd
|
||||
br.cim
|
||||
br.cng
|
||||
br.cnt
|
||||
br.com
|
||||
br.coop
|
||||
br.ecn
|
||||
br.edu
|
||||
br.eng
|
||||
br.esp
|
||||
br.etc
|
||||
br.eti
|
||||
br.far
|
||||
br.flog
|
||||
br.fm
|
||||
br.fnd
|
||||
br.fot
|
||||
br.fst
|
||||
br.g12
|
||||
br.ggf
|
||||
br.gov
|
||||
br.imb
|
||||
br.ind
|
||||
br.inf
|
||||
br.jor
|
||||
br.jus
|
||||
br.leg
|
||||
br.lel
|
||||
br.mat
|
||||
br.med
|
||||
br.mil
|
||||
br.mus
|
||||
br.net
|
||||
br.nom
|
||||
br.not
|
||||
br.ntr
|
||||
br.odo
|
||||
br.ong
|
||||
br.org
|
||||
br.ppg
|
||||
br.pro
|
||||
br.psc
|
||||
br.psi
|
||||
br.qsl
|
||||
br.radio
|
||||
br.rec
|
||||
br.slg
|
||||
br.srv
|
||||
br.taxi
|
||||
br.teo
|
||||
br.tmp
|
||||
br.trd
|
||||
br.tur
|
||||
br.tv
|
||||
br.vet
|
||||
br.vlog
|
||||
br.wiki
|
||||
br.zlg
|
||||
es.com
|
||||
es.edu
|
||||
es.gob
|
||||
es.nom
|
||||
es.org
|
||||
fk.ac
|
||||
fk.co
|
||||
fk.gov
|
||||
fk.net
|
||||
fk.nom
|
||||
fk.org
|
||||
fr.aeroport
|
||||
fr.avocat
|
||||
fr.avoues
|
||||
fr.cci
|
||||
fr.chambagri
|
||||
fr.chirurgiens-dentistes
|
||||
fr.experts-comptables
|
||||
fr.geometre-expert
|
||||
fr.greta
|
||||
fr.huissier-justice
|
||||
fr.medecin
|
||||
fr.notaires
|
||||
fr.pharmacien
|
||||
fr.port
|
||||
fr.prd
|
||||
fr.veterinaire
|
||||
gi.com
|
||||
gi.edu
|
||||
gi.gov
|
||||
gi.ltd
|
||||
gi.mod
|
||||
gi.org
|
||||
hu.2000
|
||||
hu.agrar
|
||||
hu.bolt
|
||||
hu.casino
|
||||
hu.city
|
||||
hu.co
|
||||
hu.edu
|
||||
hu.erotica
|
||||
hu.erotika
|
||||
hu.film
|
||||
hu.forum
|
||||
hu.games
|
||||
hu.gov
|
||||
hu.hotel
|
||||
hu.info
|
||||
hu.ingatlan
|
||||
hu.jogasz
|
||||
hu.konyvelo
|
||||
hu.lakas
|
||||
hu.media
|
||||
hu.mobi
|
||||
hu.net
|
||||
hu.news
|
||||
hu.org
|
||||
hu.priv
|
||||
hu.reklam
|
||||
hu.sex
|
||||
hu.shop
|
||||
hu.sport
|
||||
hu.suli
|
||||
hu.szex
|
||||
hu.tm
|
||||
hu.tozsde
|
||||
hu.utazas
|
||||
hu.video
|
||||
il.ac
|
||||
il.co
|
||||
il.gov
|
||||
il.idf
|
||||
il.k12
|
||||
il.muni
|
||||
il.net
|
||||
il.org
|
||||
im.ac
|
||||
im.co
|
||||
im.com
|
||||
im.gov
|
||||
im.net
|
||||
im.org
|
||||
im.ro
|
||||
in.ac
|
||||
in.co
|
||||
in.edu
|
||||
in.ernet
|
||||
in.firm
|
||||
in.gen
|
||||
in.gov
|
||||
in.ind
|
||||
in.mil
|
||||
in.net
|
||||
in.org
|
||||
in.res
|
||||
je.co
|
||||
je.net
|
||||
je.org
|
||||
kr.ac
|
||||
kr.busan
|
||||
kr.chungbuk
|
||||
kr.chungnam
|
||||
kr.co
|
||||
kr.daegu
|
||||
kr.daejeon
|
||||
kr.es
|
||||
kr.gangwon
|
||||
kr.go
|
||||
kr.gwangju
|
||||
kr.gyeongbuk
|
||||
kr.gyeonggi
|
||||
kr.gyeongnam
|
||||
kr.hs
|
||||
kr.incheon
|
||||
kr.jeju
|
||||
kr.jeonbuk
|
||||
kr.jeonnam
|
||||
kr.kg
|
||||
kr.mil
|
||||
kr.ms
|
||||
kr.ne
|
||||
kr.or
|
||||
kr.pe
|
||||
kr.re
|
||||
kr.sc
|
||||
kr.seoul
|
||||
kr.ulsan
|
||||
ky.com
|
||||
ky.edu
|
||||
ky.gov
|
||||
ky.net
|
||||
ky.org
|
||||
lk.ac
|
||||
lk.assn
|
||||
lk.com
|
||||
lk.edu
|
||||
lk.gov
|
||||
lk.grp
|
||||
lk.hotel
|
||||
lk.int
|
||||
lk.ltd
|
||||
lk.net
|
||||
lk.ngo
|
||||
lk.org
|
||||
lk.sch
|
||||
lk.soc
|
||||
lk.web
|
||||
ms.com
|
||||
ms.edu
|
||||
ms.gov
|
||||
ms.net
|
||||
ms.org
|
||||
nz.ac
|
||||
nz.co
|
||||
nz.cri
|
||||
nz.geek
|
||||
nz.gen
|
||||
nz.govt
|
||||
nz.health
|
||||
nz.iwi
|
||||
nz.kiwi
|
||||
nz.maori
|
||||
nz.mil
|
||||
nz.net
|
||||
nz.org
|
||||
nz.parliament
|
||||
nz.school
|
||||
pn.ac
|
||||
pn.co
|
||||
pn.in
|
||||
pn.net
|
||||
pn.org
|
||||
re.asso
|
||||
re.com
|
||||
re.nom
|
||||
ru, mari.ru.mari-el
|
||||
ru.ac
|
||||
ru.adygeya
|
||||
ru.altai
|
||||
ru.amur
|
||||
ru.amursk
|
||||
ru.arkhangelsk
|
||||
ru.astrakhan
|
||||
ru.baikal
|
||||
ru.bashkiria
|
||||
ru.belgorod
|
||||
ru.bir
|
||||
ru.bryansk
|
||||
ru.buryatia
|
||||
ru.cap
|
||||
ru.cbg
|
||||
ru.chel
|
||||
ru.chelyabinsk
|
||||
ru.chita
|
||||
ru.chukotka
|
||||
ru.cmw
|
||||
ru.com
|
||||
ru.dagestan
|
||||
ru.e-burg
|
||||
ru.edu
|
||||
ru.fareast
|
||||
ru.gov
|
||||
ru.grozny
|
||||
ru.int
|
||||
ru.irkutsk
|
||||
ru.ivanovo
|
||||
ru.izhevsk
|
||||
ru.jamal
|
||||
ru.jar
|
||||
ru.joshkar-ola
|
||||
ru.k-uralsk
|
||||
ru.kalmykia
|
||||
ru.kaluga
|
||||
ru.kamchatka
|
||||
ru.karelia
|
||||
ru.kazan
|
||||
ru.kchr
|
||||
ru.kemerovo
|
||||
ru.khabarovsk
|
||||
ru.khakassia
|
||||
ru.khv
|
||||
ru.kirov
|
||||
ru.kms
|
||||
ru.koenig
|
||||
ru.komi
|
||||
ru.kostroma
|
||||
ru.krasnoyarsk
|
||||
ru.kuban
|
||||
ru.kurgan
|
||||
ru.kursk
|
||||
ru.kustanai
|
||||
ru.kuzbass
|
||||
ru.lipetsk
|
||||
ru.magadan
|
||||
ru.magnitka
|
||||
ru.marine
|
||||
ru.mil
|
||||
ru.mordovia
|
||||
ru.mos
|
||||
ru.mosreg
|
||||
ru.msk
|
||||
ru.murmansk
|
||||
ru.mytis
|
||||
ru.nakhodka
|
||||
ru.nalchik
|
||||
ru.net
|
||||
ru.nkz
|
||||
ru.nnov
|
||||
ru.norilsk
|
||||
ru.nov
|
||||
ru.novosibirsk
|
||||
ru.nsk
|
||||
ru.omsk
|
||||
ru.orenburg
|
||||
ru.org
|
||||
ru.oryol
|
||||
ru.oskol
|
||||
ru.penza
|
||||
ru.perm
|
||||
ru.pp
|
||||
ru.pskov
|
||||
ru.ptz
|
||||
ru.pyatigorsk
|
||||
ru.rnd
|
||||
ru.rubtsovsk
|
||||
ru.ryazan
|
||||
ru.sakhalin
|
||||
ru.samara
|
||||
ru.saratov
|
||||
ru.simbirsk
|
||||
ru.smolensk
|
||||
ru.snz
|
||||
ru.spb
|
||||
ru.stavropol
|
||||
ru.stv
|
||||
ru.surgut
|
||||
ru.syzran
|
||||
ru.tambov
|
||||
ru.tatarstan
|
||||
ru.tlt
|
||||
ru.tom
|
||||
ru.tomsk
|
||||
ru.tsaritsyn
|
||||
ru.tsk
|
||||
ru.tula
|
||||
ru.tuva
|
||||
ru.tver
|
||||
ru.tyumen
|
||||
ru.udm
|
||||
ru.udmurtia
|
||||
ru.ulan-ude
|
||||
ru.vdonsk
|
||||
ru.vladikavkaz
|
||||
ru.vladimir
|
||||
ru.vladivostok
|
||||
ru.volgograd
|
||||
ru.vologda
|
||||
ru.voronezh
|
||||
ru.vrn
|
||||
ru.vyatka
|
||||
ru.yakutia
|
||||
ru.yamal
|
||||
ru.yaroslavl
|
||||
ru.yekaterinburg
|
||||
ru.yuzhno-sakhalinsk
|
||||
sg.com
|
||||
sg.edu
|
||||
sg.gov
|
||||
sg.net
|
||||
sg.org
|
||||
sg.per
|
||||
sh.co
|
||||
sh.com
|
||||
sh.edu
|
||||
sh.gov
|
||||
sh.net
|
||||
sh.nom
|
||||
sh.org
|
||||
tc.com
|
||||
tc.net
|
||||
tc.org
|
||||
tc.pro
|
||||
th.ac
|
||||
th.co
|
||||
th.go
|
||||
th.in
|
||||
th.mi
|
||||
th.net
|
||||
th.or
|
||||
tr.av
|
||||
tr.bbs
|
||||
tr.bel
|
||||
tr.biz
|
||||
tr.com
|
||||
tr.dr
|
||||
tr.edu
|
||||
tr.gen
|
||||
tr.gov
|
||||
tr.info
|
||||
tr.k12
|
||||
tr.mil
|
||||
tr.name
|
||||
tr.net
|
||||
tr.org
|
||||
tr.pol
|
||||
tr.tel
|
||||
tr.tv
|
||||
tr.web
|
||||
tt.aero
|
||||
tt.biz
|
||||
tt.charity
|
||||
tt.co
|
||||
tt.com
|
||||
tt.coop
|
||||
tt.edu
|
||||
tt.gov
|
||||
tt.info
|
||||
tt.int
|
||||
tt.jobs
|
||||
tt.mil
|
||||
tt.mobi
|
||||
tt.museum
|
||||
tt.name
|
||||
tt.net
|
||||
tt.org
|
||||
tt.pro
|
||||
tt.tel
|
||||
tt.travel
|
||||
ua.cherkassy
|
||||
ua.cherkasy
|
||||
ua.chernigov
|
||||
ua.chernivtsi
|
||||
ua.chernovtsy
|
||||
ua.ck
|
||||
ua.cn
|
||||
ua.com
|
||||
ua.cr
|
||||
ua.crimea
|
||||
ua.cv
|
||||
ua.dn
|
||||
ua.dnepropetrovsk
|
||||
ua.dnipropetrovsk
|
||||
ua.donetsk
|
||||
ua.dp
|
||||
ua.edu
|
||||
ua.gov
|
||||
ua.if
|
||||
ua.in
|
||||
ua.ivano-frankivsk
|
||||
ua.kh
|
||||
ua.kharkiv
|
||||
ua.kharkov
|
||||
ua.kherson
|
||||
ua.khmelnitskiy
|
||||
ua.kiev
|
||||
ua.kirovograd
|
||||
ua.km
|
||||
ua.kr
|
||||
ua.ks
|
||||
ua.kv
|
||||
ua.kyiv
|
||||
ua.lg
|
||||
ua.lugansk
|
||||
ua.lutsk
|
||||
ua.lv
|
||||
ua.lviv
|
||||
ua.mk
|
||||
ua.net
|
||||
ua.nikolaev
|
||||
ua.od
|
||||
ua.odesa
|
||||
ua.odessa
|
||||
ua.org
|
||||
ua.pl
|
||||
ua.poltava
|
||||
ua.pp
|
||||
ua.rivne
|
||||
ua.rovno
|
||||
ua.rv
|
||||
ua.sevastopol
|
||||
ua.sm
|
||||
ua.sumy
|
||||
ua.te
|
||||
ua.ternopil
|
||||
ua.uz
|
||||
ua.uzhgorod
|
||||
ua.vinnica
|
||||
ua.vl
|
||||
ua.vn
|
||||
ua.volyn
|
||||
ua.yalta
|
||||
ua.zaporizhzhe
|
||||
ua.zhitomir
|
||||
ua.zp
|
||||
ua.zt
|
||||
uk.ac
|
||||
uk.co
|
||||
uk.gov
|
||||
uk.ltd
|
||||
uk.me
|
||||
uk.mil
|
||||
uk.mod
|
||||
uk.net
|
||||
uk.nhs
|
||||
uk.nic
|
||||
uk.org
|
||||
uk.parliament
|
||||
uk.plc
|
||||
uk.police
|
||||
uk.sch
|
||||
us.fed
|
||||
us.isa
|
||||
us.nsn
|
||||
za.ac
|
||||
za.agric
|
||||
za.alt
|
||||
za.co
|
||||
za.edu
|
||||
za.gov
|
||||
za.grondar
|
||||
za.law
|
||||
za.mil
|
||||
za.net
|
||||
za.ngo
|
||||
za.nis
|
||||
za.nom
|
||||
za.org
|
||||
za.school
|
||||
za.tm
|
||||
za.web
|
||||
31
src/README.md
Normal file
31
src/README.md
Normal file
@@ -0,0 +1,31 @@
|
||||
- `main.py`
|
||||
Simply call this script in a cron job or something and it will take care of everything else.
|
||||
It will run the scripts below in the proper order.
|
||||
|
||||
- `3rd-domains.txt`
|
||||
Contains a list of common 3rd level domains, such as `co.uk`.
|
||||
|
||||
- `common_lib.py`
|
||||
Library with useful functions used across multiple python scripts.
|
||||
|
||||
- `bundle_import.py`
|
||||
Will copy all `*.json` files from `data/_in` to their bundle id dest folder e.g.
|
||||
`mv data/_in/test.json data/com/apple/notes/id_42.json`.
|
||||
|
||||
- `bundle_combine.py`
|
||||
Merges all `id_*.json` files from a bundle id into a single `combined.json`.
|
||||
(run this script with one or multiple bundle ids as parameter.)
|
||||
|
||||
- `bundle_download.py`
|
||||
Download and cache app metadata from apple servers in de and en given a bundle id. Will also download the app icon and store it in the bundle id out folder.
|
||||
(run this script with one or multiple bundle ids as parameter.)
|
||||
|
||||
- `html_bundle.py`
|
||||
Takes the `combined.json` file and generates the graphs and html file.
|
||||
(run this script with one or multiple bundle ids as parameter.)
|
||||
|
||||
- `html_index.py`
|
||||
Create all pages for the app index and link to bundle id subpages.
|
||||
|
||||
- `html_root.py`
|
||||
Create main `index.html`.
|
||||
82
src/bundle_combine.py
Executable file
82
src/bundle_combine.py
Executable file
@@ -0,0 +1,82 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import common_lib as mylib
|
||||
|
||||
|
||||
with open(mylib.path_root('src', '3rd-domains.txt'), 'r') as fp:
|
||||
level3_doms = set([x.strip() for x in fp.readlines()])
|
||||
|
||||
|
||||
def dom_in_3rd_domain(needle):
|
||||
# TODO: binary tree lookup
|
||||
return needle in level3_doms
|
||||
|
||||
|
||||
def get_parent_domain(subdomain):
|
||||
parts = subdomain.split('.')
|
||||
if len(parts) < 3:
|
||||
return x
|
||||
elif dom_in_3rd_domain('.'.join(parts[-2:])):
|
||||
return '.'.join(parts[-3:])
|
||||
else:
|
||||
return '.'.join(parts[-2:])
|
||||
|
||||
|
||||
def dict_increment(ddic, key, num):
|
||||
try:
|
||||
ddic[key]
|
||||
except KeyError:
|
||||
ddic[key] = 0
|
||||
ddic[key] += num
|
||||
|
||||
|
||||
def json_combine(bundle_id):
|
||||
res = dict()
|
||||
domA = dict() # unique sub domains
|
||||
domB = dict() # total sub domains
|
||||
domC = dict() # unique parent domains
|
||||
domD = dict() # total parent domains
|
||||
for fname, jdata in mylib.enum_jsons(bundle_id):
|
||||
res['name'] = jdata['app-name']
|
||||
dict_increment(res, '#rec', 1)
|
||||
dict_increment(res, 'rec-total', jdata['duration'])
|
||||
try:
|
||||
logs = jdata['logs']
|
||||
uniq_par = set()
|
||||
for subdomain in logs:
|
||||
occurs = len(logs[subdomain])
|
||||
dict_increment(res, '#logs', occurs)
|
||||
dict_increment(domA, subdomain, 1)
|
||||
dict_increment(domB, subdomain, occurs)
|
||||
par_dom = get_parent_domain(subdomain)
|
||||
uniq_par.add(par_dom)
|
||||
dict_increment(domD, par_dom, occurs)
|
||||
for par in uniq_par:
|
||||
dict_increment(domC, par, 1)
|
||||
except KeyError:
|
||||
mylib.err('bundle-combine', 'skip: ' + fname)
|
||||
res['uniq_subdom'] = domA
|
||||
res['uniq_pardom'] = domC
|
||||
res['total_subdom'] = domB
|
||||
res['total_pardom'] = domD
|
||||
return res
|
||||
|
||||
|
||||
def process(bundle_ids):
|
||||
print('writing combined json ...')
|
||||
if bundle_ids == ['*']:
|
||||
bundle_ids = list(mylib.enum_appids())
|
||||
for bid in bundle_ids:
|
||||
print(' ' + bid)
|
||||
mylib.json_write_combined(bid, json_combine(bid))
|
||||
print('')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = sys.argv[1:]
|
||||
if len(args) > 0:
|
||||
process(args)
|
||||
else:
|
||||
# process(['*'])
|
||||
mylib.usage(__file__, '[bundle_id] [...]')
|
||||
75
src/bundle_download.py
Executable file
75
src/bundle_download.py
Executable file
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import common_lib as mylib
|
||||
|
||||
|
||||
def download_info(bundle_id, lang, force=False):
|
||||
if force or not mylib.meta_json_exists(bundle_id, lang):
|
||||
url = 'https://itunes.apple.com/lookup?bundleId={}&country={}'.format(
|
||||
bundle_id, lang.upper())
|
||||
json = mylib.download(url, isJSON=True)
|
||||
json = json['results'][0]
|
||||
# delete unused keys to save on storage
|
||||
for key in ['supportedDevices', 'releaseNotes', 'description',
|
||||
'screenshotUrls']:
|
||||
try:
|
||||
del(json[key])
|
||||
except KeyError:
|
||||
continue
|
||||
mylib.json_write_meta(bundle_id, json, lang)
|
||||
|
||||
|
||||
def download_icon(bundle_id, force=False, langs=['us', 'de']):
|
||||
# icon_file = mylib.path_data_app(bundle_id, 'icon.png')
|
||||
icon_file = mylib.path_out_app(bundle_id, 'icon.png')
|
||||
if force or not mylib.file_exists(icon_file):
|
||||
json = None
|
||||
for lang in langs:
|
||||
if not json:
|
||||
json = mylib.json_read_meta(bundle_id, lang)
|
||||
mylib.download_file(json['artworkUrl100'], icon_file)
|
||||
|
||||
|
||||
def download(bundle_id, force=False):
|
||||
if not mylib.valid_bundle_id(bundle_id):
|
||||
mylib.err('apple-download', 'invalid id: ' + bundle_id)
|
||||
return
|
||||
|
||||
mylib.printf(' {} => '.format(bundle_id))
|
||||
for lang in ['us', 'de']:
|
||||
try:
|
||||
mylib.printf(lang)
|
||||
download_info(bundle_id, lang, force=force)
|
||||
mylib.printf('[✔] ')
|
||||
except Exception:
|
||||
mylib.printf('[✘] ')
|
||||
mylib.err('apple-download', 'json {}: {}'.format(
|
||||
lang.upper(), bundle_id), logOnly=True)
|
||||
try:
|
||||
mylib.printf('icon')
|
||||
download_icon(bundle_id, force=force)
|
||||
mylib.printf('[✔] ')
|
||||
except Exception:
|
||||
mylib.printf('[✘] ')
|
||||
mylib.err('apple-download', 'img for ' + bundle_id, logOnly=True)
|
||||
print('') # end printf line
|
||||
|
||||
|
||||
def process(bundle_ids, force=False):
|
||||
print('downloading bundle info ...')
|
||||
if bundle_ids == ['*']:
|
||||
bundle_ids = list(mylib.enum_data_appids())
|
||||
|
||||
for bid in bundle_ids:
|
||||
download(bid, force=force)
|
||||
print('')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = sys.argv[1:]
|
||||
if len(args) > 0:
|
||||
process(args)
|
||||
else:
|
||||
# process(['*'], force=False)
|
||||
mylib.usage(__file__, '[bundle_id] [...]')
|
||||
39
src/bundle_import.py
Executable file
39
src/bundle_import.py
Executable file
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import common_lib as mylib
|
||||
|
||||
|
||||
def next_path(path_pattern):
|
||||
i = 1
|
||||
while os.path.exists(path_pattern % i):
|
||||
i = i * 2
|
||||
a, b = (i // 2, i)
|
||||
while a + 1 < b:
|
||||
c = (a + b) // 2 # interval midpoint
|
||||
a, b = (c, b) if os.path.exists(path_pattern % c) else (a, c)
|
||||
return path_pattern % b
|
||||
|
||||
|
||||
def process():
|
||||
print('checking incoming files ...')
|
||||
prefix = mylib.path_len(mylib.path_data())
|
||||
needs_update = set()
|
||||
for fname, jdata in mylib.enum_newly_added():
|
||||
try:
|
||||
bundle_id = jdata['app-bundle'].strip()
|
||||
if mylib.valid_bundle_id(bundle_id):
|
||||
dest = mylib.path_data_app(bundle_id)
|
||||
needs_update.add(bundle_id)
|
||||
else:
|
||||
dest = mylib.path_data('_manually')
|
||||
# needs_update.add('_manually')
|
||||
|
||||
mylib.mkdir(dest)
|
||||
dest_file = next_path(mylib.path_add(dest, 'id_%s.json'))
|
||||
mylib.mv(fname, dest_file, printOmitPrefix=prefix)
|
||||
except KeyError:
|
||||
mylib.err('json-import', 'malformed json: ' + bundle_id)
|
||||
print('done.')
|
||||
print('')
|
||||
return needs_update
|
||||
185
src/common_lib.py
Executable file
185
src/common_lib.py
Executable file
@@ -0,0 +1,185 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import re
|
||||
import glob
|
||||
import json
|
||||
import shutil
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import urllib.request as curl
|
||||
|
||||
base_dir = os.path.realpath(os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)), os.pardir))
|
||||
|
||||
|
||||
# Paths
|
||||
|
||||
def path_add(base, *parts):
|
||||
return os.path.join(base, *parts)
|
||||
|
||||
|
||||
def path_root(*parts):
|
||||
return os.path.join(base_dir, *parts)
|
||||
|
||||
|
||||
def path_data(*path_components):
|
||||
return path_root('data', *path_components)
|
||||
|
||||
|
||||
def path_data_app(bundle_id, filename=None):
|
||||
pth = path_root('data', *bundle_id.split('.'))
|
||||
return path_add(pth, filename) if filename else pth
|
||||
|
||||
|
||||
def path_out(*path_components):
|
||||
return path_root('out', *path_components)
|
||||
|
||||
|
||||
def path_out_app(bundle_id, filename=None):
|
||||
pth = path_root('out', 'app', bundle_id)
|
||||
return path_add(pth, filename) if filename else pth
|
||||
|
||||
|
||||
def path_len(path, isDir=True):
|
||||
return len(path) + (len(os.sep) if isDir else 0)
|
||||
|
||||
|
||||
# Tempaltes
|
||||
|
||||
def template(html_file):
|
||||
return path_root('templates', html_file)
|
||||
|
||||
|
||||
def template_with_base(content, title=None):
|
||||
with open(template('base.html'), 'r') as fp:
|
||||
return fp.read().replace(
|
||||
'#_TITLE_#', title + ' – ' if title else '').replace(
|
||||
'#_CONTENT_#', content)
|
||||
|
||||
|
||||
# Other
|
||||
|
||||
regex_bundle_id = re.compile(r'^[A-Za-z0-9\.\-]{1,155}$')
|
||||
logging.basicConfig(filename=os.path.join(os.pardir, "error.log"),
|
||||
format='%(asctime)s %(message)s',
|
||||
filemode='a')
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
def usage(_file_, params=''):
|
||||
print(' usage: ' + os.path.basename(_file_) + ' ' + params)
|
||||
|
||||
|
||||
def valid_bundle_id(bundle_id):
|
||||
return regex_bundle_id.match(bundle_id)
|
||||
|
||||
|
||||
def err(scope, msg, logOnly=False):
|
||||
logger.error('[{}] {}'.format(scope, msg))
|
||||
if not logOnly:
|
||||
print(' [ERROR] ' + msg)
|
||||
|
||||
|
||||
def printf(msg):
|
||||
print(msg, end='', flush=True)
|
||||
|
||||
|
||||
# Filesystem
|
||||
|
||||
def mkdir(path):
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def mv(path, to, printOmitPrefix=None):
|
||||
if printOmitPrefix:
|
||||
print(' mv ' + path[printOmitPrefix:] + ' -> ' + to[printOmitPrefix:])
|
||||
Path(path).rename(to)
|
||||
|
||||
|
||||
def rm(path):
|
||||
shutil.rmtree(path)
|
||||
|
||||
|
||||
def dir_exists(path):
|
||||
return os.path.isdir(path)
|
||||
|
||||
|
||||
def file_exists(path):
|
||||
return os.path.isfile(path) and os.path.getsize(path) > 0
|
||||
|
||||
|
||||
def meta_json_exists(bundle_id, lang):
|
||||
return file_exists(path_data_app(bundle_id, 'info_{}.json'.format(lang)))
|
||||
|
||||
|
||||
# Download
|
||||
|
||||
def download(url, isJSON=False):
|
||||
req = curl.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
|
||||
with curl.urlopen(req) as response:
|
||||
data = response.read()
|
||||
return json.loads(data.decode()) if isJSON else data
|
||||
|
||||
|
||||
def download_file(url, path):
|
||||
curl.urlretrieve(url, path)
|
||||
|
||||
|
||||
# Enumerator
|
||||
|
||||
def enum_newly_added():
|
||||
for fname in glob.glob(path_data('_in', '*.json')):
|
||||
with open(fname, 'r') as fp:
|
||||
yield fname, json.load(fp)
|
||||
|
||||
|
||||
def enum_appids():
|
||||
for x in glob.glob(path_out_app('*')):
|
||||
yield os.path.basename(x)
|
||||
|
||||
|
||||
def enum_jsons(bundle_id):
|
||||
for fname in glob.glob(path_data_app(bundle_id, 'id_*.json')):
|
||||
with open(fname, 'r') as fp:
|
||||
yield fname, json.load(fp)
|
||||
|
||||
|
||||
def enum_data_appids():
|
||||
data_root = path_data()
|
||||
prfx = path_len(data_root)
|
||||
for path, dirs, files in os.walk(data_root):
|
||||
if 'combined.json' in files:
|
||||
yield path[prfx:].replace(os.sep, '.')
|
||||
|
||||
|
||||
# JSON read
|
||||
|
||||
def json_read(path):
|
||||
with open(path, 'r') as fp:
|
||||
return json.load(fp)
|
||||
|
||||
|
||||
def json_read_combined(bundle_id):
|
||||
return json_read(path_data_app(bundle_id, 'combined.json'))
|
||||
|
||||
|
||||
def json_read_meta(bundle_id, lang):
|
||||
return json_read(path_data_app(bundle_id, 'info_{}.json'.format(lang)))
|
||||
|
||||
|
||||
# JSON write
|
||||
|
||||
def json_write(path, obj, pretty=False):
|
||||
with open(path, 'w') as fp:
|
||||
json.dump(obj, fp, indent=2 if pretty else None, sort_keys=pretty)
|
||||
|
||||
|
||||
def json_write_combined(bundle_id, obj):
|
||||
fname = path_data_app(bundle_id, 'combined.json')
|
||||
json_write(fname, obj, pretty=True)
|
||||
|
||||
|
||||
def json_write_meta(bundle_id, obj, lang):
|
||||
fname = path_data_app(bundle_id, 'info_{}.json'.format(lang))
|
||||
json_write(fname, obj, pretty=True)
|
||||
116
src/html_bundle.py
Executable file
116
src/html_bundle.py
Executable file
@@ -0,0 +1,116 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import common_lib as mylib
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
matplotlib.use('Agg') # disable interactive mode
|
||||
|
||||
|
||||
def sort_dict(count_dict):
|
||||
sorted_count = sorted(count_dict.items(), key=lambda x: (-x[1], x[0]))
|
||||
names = ['{} ({})'.format(*x) for x in sorted_count]
|
||||
sizes = [x[1] for x in sorted_count]
|
||||
return names, sizes
|
||||
|
||||
|
||||
def gen_graph(count_dict, outfile):
|
||||
names, sizes = sort_dict(count_dict)
|
||||
pie1, _ = plt.pie(sizes, labels=names)
|
||||
plt.setp(pie1, width=0.5, edgecolor='white')
|
||||
plt.subplots_adjust(left=0, right=1, top=0.7, bottom=0.3)
|
||||
plt.savefig(outfile, bbox_inches='tight', pad_inches=0) # transparent=True
|
||||
plt.close()
|
||||
|
||||
|
||||
def seconds_to_time(seconds):
|
||||
minutes, seconds = divmod(seconds, 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
return '{:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds)
|
||||
|
||||
|
||||
def gen_domain_tags(unsorted_dict):
|
||||
for x in sorted(unsorted_dict):
|
||||
yield '<i>{}</i>'.format(x)
|
||||
|
||||
|
||||
def gen_html(bundle_id, obj):
|
||||
return mylib.template_with_base(f'''
|
||||
<h2>{obj['name']}</h2>
|
||||
<div id="meta">
|
||||
<table>
|
||||
<tr><td>Bundle-id:</td><td>{
|
||||
bundle_id
|
||||
}</td></tr>
|
||||
<tr><td>Number of recordings:</td><td>{
|
||||
obj['#rec']
|
||||
}</td></tr>
|
||||
<tr><td>Total number of logs:</td><td>{
|
||||
obj['#logs']
|
||||
}</td></tr>
|
||||
<tr><td>Cumulative recording time:</td><td>{
|
||||
seconds_to_time(obj['rec-total'])
|
||||
}</td></tr>
|
||||
<tr><td>Average recording time:</td><td>{
|
||||
round(obj['rec-total'] / obj['#rec'], 1)
|
||||
} s</td></tr>
|
||||
</table>
|
||||
</div>
|
||||
<h3>Connections</h3>
|
||||
<div id="connections">
|
||||
<table>
|
||||
<tr><td>Domains:</td><td>{
|
||||
''.join(gen_domain_tags(obj['uniq_pardom']))
|
||||
}</td></tr>
|
||||
<tr><td>Subdomains:</td><td>{
|
||||
''.join(gen_domain_tags(obj['uniq_subdom']))
|
||||
}</td></tr>
|
||||
<tr><td>Known Trackers:</td><td>{
|
||||
'...'
|
||||
}</td></tr>
|
||||
</table>
|
||||
<figure><img src="par.svg"></figure>
|
||||
<figure><img src="sub.svg"></figure>
|
||||
</div>''', title=obj['name'])
|
||||
|
||||
|
||||
def make_bundle_out(bundle_id):
|
||||
jdata = mylib.json_read_combined(bundle_id)
|
||||
out_dir = mylib.path_out_app(bundle_id)
|
||||
needs_update_index = False
|
||||
if not mylib.dir_exists(out_dir):
|
||||
needs_update_index = True
|
||||
mylib.mkdir(out_dir)
|
||||
try:
|
||||
gen_graph(jdata['total_subdom'], mylib.path_add(out_dir, 'sub.svg'))
|
||||
gen_graph(jdata['total_pardom'], mylib.path_add(out_dir, 'par.svg'))
|
||||
except KeyError:
|
||||
mylib.err('bundle-generate-page', 'skip: ' + bundle_id)
|
||||
|
||||
with open(mylib.path_add(out_dir, 'index.html'), 'w') as fp:
|
||||
fp.write(gen_html(bundle_id, jdata))
|
||||
return needs_update_index
|
||||
|
||||
|
||||
def process(bundle_ids):
|
||||
print('generating html pages ...')
|
||||
if bundle_ids == ['*']:
|
||||
bundle_ids = list(mylib.enum_appids())
|
||||
|
||||
ids_new_in_index = set()
|
||||
for bid in bundle_ids:
|
||||
print(' ' + bid)
|
||||
if make_bundle_out(bid):
|
||||
ids_new_in_index.add(bid)
|
||||
print('')
|
||||
return ids_new_in_index
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = sys.argv[1:]
|
||||
if len(args) > 0:
|
||||
process(args)
|
||||
else:
|
||||
# process(['*'])
|
||||
mylib.usage(__file__, '[bundle_id] [...]')
|
||||
83
src/html_index.py
Executable file
83
src/html_index.py
Executable file
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import common_lib as mylib
|
||||
|
||||
|
||||
def gen_obj(bundle_id):
|
||||
if mylib.file_exists(mylib.path_out_app(bundle_id, 'icon.png')):
|
||||
icon = '/app/{0}/icon.png'.format(bundle_id)
|
||||
else:
|
||||
icon = '/static/app-template.svg'
|
||||
try:
|
||||
name = mylib.json_read_meta(bundle_id, 'de')['trackCensoredName']
|
||||
except Exception:
|
||||
name = '< App-Name >'
|
||||
return {'id': bundle_id, 'name': name, 'img': icon}
|
||||
|
||||
|
||||
def gen_entry(obj):
|
||||
return '''
|
||||
<a href="/app/{id}/index.html">
|
||||
<div>
|
||||
<img src="{img}" width="100" height="100">
|
||||
<span class="name">{name}</span><br />
|
||||
<span class="detail">{id}</span>
|
||||
</div>
|
||||
</a>'''.format(**obj)
|
||||
|
||||
|
||||
def gen_pager(current, total):
|
||||
|
||||
def mklink(i, name, active=False):
|
||||
clss = ' class="active"' if active else ''
|
||||
return '<a href="./{}"{}>{}</a>'.format(i, clss, name)
|
||||
|
||||
links = ''
|
||||
# if current > 1:
|
||||
# links += mklink(current - 1, 'Previous')
|
||||
start = max(1, current - 5)
|
||||
for i in range(start, min(total, start + 10) + 1):
|
||||
links += mklink(i, i, active=i == current)
|
||||
# if current < total:
|
||||
# links += mklink(current + 1, 'Next')
|
||||
return '<div id="pagination">{}</div>'.format(links)
|
||||
|
||||
|
||||
def gen_page(arr, base, page_id=1, total=1):
|
||||
path = mylib.path_add(base, str(page_id))
|
||||
mylib.mkdir(path)
|
||||
with open(mylib.path_add(path, 'index.html'), 'w') as fp:
|
||||
content = ''.join([gen_entry(x) for x in arr])
|
||||
pagination = gen_pager(page_id, total) # if total > 1 else ''
|
||||
fp.write(mylib.template_with_base('''
|
||||
<h2>List of app recordings (A–Z)</h2>
|
||||
<div id="app-toc">
|
||||
{}
|
||||
</div>
|
||||
{}'''.format(content, pagination), title="Index"))
|
||||
|
||||
|
||||
def process(per_page=60):
|
||||
print('generating app-index ...')
|
||||
index_dir = mylib.path_out('index', 'page')
|
||||
mylib.rm(index_dir)
|
||||
mylib.mkdir(index_dir)
|
||||
|
||||
apps = [gen_obj(x) for x in mylib.enum_appids()]
|
||||
apps_total = len(apps)
|
||||
pages_total, rest = divmod(apps_total, per_page)
|
||||
if rest > 0:
|
||||
pages_total += 1
|
||||
print(' {} apps'.format(apps_total))
|
||||
print(' {} pages'.format(pages_total))
|
||||
|
||||
apps_sorted = sorted(apps, key=lambda x: (x['name'], x['id']))
|
||||
for x in range(1, pages_total + 1):
|
||||
start = (x - 1) * per_page
|
||||
batch = apps_sorted[start:start + per_page]
|
||||
gen_page(batch, index_dir, x, pages_total)
|
||||
print('')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
process()
|
||||
33
src/html_root.py
Executable file
33
src/html_root.py
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import common_lib as mylib
|
||||
|
||||
|
||||
def gen_root():
|
||||
with open(mylib.path_out('index.html'), 'w') as fp:
|
||||
fp.write(mylib.template_with_base('''
|
||||
<h2>About</h2>
|
||||
<p class="squeeze">
|
||||
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||
</p>
|
||||
<a id="get-appcheck" href="https://testflight.apple.com/join/9jjaFeHO">
|
||||
<img src="/static/appcheck.svg" alt="app-icon" width="30" height="30">
|
||||
<p>
|
||||
Get the app and contribute.<br />
|
||||
Join the TestFlight Beta.
|
||||
</p>
|
||||
</a>{}'''. format('')))
|
||||
|
||||
|
||||
def process():
|
||||
print('generating root html ...')
|
||||
gen_root() # root index.thml
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
process()
|
||||
55
src/main.py
Executable file
55
src/main.py
Executable file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import common_lib as mylib
|
||||
import bundle_import
|
||||
import bundle_combine
|
||||
import bundle_download
|
||||
import html_root
|
||||
import html_index
|
||||
import html_bundle
|
||||
|
||||
|
||||
def reset_new():
|
||||
print("RESET json files ...")
|
||||
prefix = mylib.path_len(mylib.path_data())
|
||||
for bid in mylib.enum_appids():
|
||||
for src, _ in mylib.enum_jsons(bid):
|
||||
frmt = mylib.path_add(mylib.path_data('_in', '%s.json'))
|
||||
dest = bundle_import.next_path(frmt)
|
||||
mylib.mv(src, dest, printOmitPrefix=prefix)
|
||||
print('')
|
||||
|
||||
|
||||
def del_id(bundle_ids):
|
||||
if bundle_ids == ['*']:
|
||||
bundle_ids = list(mylib.enum_appids())
|
||||
|
||||
for bid in bundle_ids:
|
||||
dest = mylib.path_out_app(bid)
|
||||
if mylib.dir_exists(dest):
|
||||
mylib.rm(dest)
|
||||
html_index.process()
|
||||
|
||||
|
||||
def full_chain(force=False):
|
||||
bundle_ids = bundle_import.process()
|
||||
if force:
|
||||
bundle_ids = list(mylib.enum_data_appids())
|
||||
if len(bundle_ids) > 0:
|
||||
bundle_combine.process(bundle_ids)
|
||||
new_ids = html_bundle.process(bundle_ids)
|
||||
if len(new_ids) > 0:
|
||||
bundle_download.process(new_ids)
|
||||
html_index.process()
|
||||
html_root.process()
|
||||
else:
|
||||
print('none to import, not rebuilding index')
|
||||
|
||||
|
||||
def process():
|
||||
# reset_new()
|
||||
# del_id(['*'])
|
||||
full_chain(force=False)
|
||||
|
||||
|
||||
process()
|
||||
Reference in New Issue
Block a user