Initial

2020-08-23 21:18:40 +02:00
commit ec6e4b5a90
72 changed files with 4913 additions and 0 deletions
--- a/src/3rd-domains.txt
+++ b/src/3rd-domains.txt
@@ -0,0 +1,556 @@
+at.ac
+at.co
+at.gv
+at.or
+at.priv
+au.asn
+au.com
+au.csiro
+au.edu
+au.gov
+au.id
+au.net
+au.org
+bm.com
+bm.edu
+bm.gov
+bm.net
+bm.org
+br.adm
+br.adv
+br.agr
+br.am
+br.arq
+br.art
+br.ato
+br.b
+br.bio
+br.blog
+br.bmd
+br.cim
+br.cng
+br.cnt
+br.com
+br.coop
+br.ecn
+br.edu
+br.eng
+br.esp
+br.etc
+br.eti
+br.far
+br.flog
+br.fm
+br.fnd
+br.fot
+br.fst
+br.g12
+br.ggf
+br.gov
+br.imb
+br.ind
+br.inf
+br.jor
+br.jus
+br.leg
+br.lel
+br.mat
+br.med
+br.mil
+br.mus
+br.net
+br.nom
+br.not
+br.ntr
+br.odo
+br.ong
+br.org
+br.ppg
+br.pro
+br.psc
+br.psi
+br.qsl
+br.radio
+br.rec
+br.slg
+br.srv
+br.taxi
+br.teo
+br.tmp
+br.trd
+br.tur
+br.tv
+br.vet
+br.vlog
+br.wiki
+br.zlg
+es.com
+es.edu
+es.gob
+es.nom
+es.org
+fk.ac
+fk.co
+fk.gov
+fk.net
+fk.nom
+fk.org
+fr.aeroport
+fr.avocat
+fr.avoues
+fr.cci
+fr.chambagri
+fr.chirurgiens-dentistes
+fr.experts-comptables
+fr.geometre-expert
+fr.greta
+fr.huissier-justice
+fr.medecin
+fr.notaires
+fr.pharmacien
+fr.port
+fr.prd
+fr.veterinaire
+gi.com
+gi.edu
+gi.gov
+gi.ltd
+gi.mod
+gi.org
+hu.2000
+hu.agrar
+hu.bolt
+hu.casino
+hu.city
+hu.co
+hu.edu
+hu.erotica
+hu.erotika
+hu.film
+hu.forum
+hu.games
+hu.gov
+hu.hotel
+hu.info
+hu.ingatlan
+hu.jogasz
+hu.konyvelo
+hu.lakas
+hu.media
+hu.mobi
+hu.net
+hu.news
+hu.org
+hu.priv
+hu.reklam
+hu.sex
+hu.shop
+hu.sport
+hu.suli
+hu.szex
+hu.tm
+hu.tozsde
+hu.utazas
+hu.video
+il.ac
+il.co
+il.gov
+il.idf
+il.k12
+il.muni
+il.net
+il.org
+im.ac
+im.co
+im.com
+im.gov
+im.net
+im.org
+im.ro
+in.ac
+in.co
+in.edu
+in.ernet
+in.firm
+in.gen
+in.gov
+in.ind
+in.mil
+in.net
+in.org
+in.res
+je.co
+je.net
+je.org
+kr.ac
+kr.busan
+kr.chungbuk
+kr.chungnam
+kr.co
+kr.daegu
+kr.daejeon
+kr.es
+kr.gangwon
+kr.go
+kr.gwangju
+kr.gyeongbuk
+kr.gyeonggi
+kr.gyeongnam
+kr.hs
+kr.incheon
+kr.jeju
+kr.jeonbuk
+kr.jeonnam
+kr.kg
+kr.mil
+kr.ms
+kr.ne
+kr.or
+kr.pe
+kr.re
+kr.sc
+kr.seoul
+kr.ulsan
+ky.com
+ky.edu
+ky.gov
+ky.net
+ky.org
+lk.ac
+lk.assn
+lk.com
+lk.edu
+lk.gov
+lk.grp
+lk.hotel
+lk.int
+lk.ltd
+lk.net
+lk.ngo
+lk.org
+lk.sch
+lk.soc
+lk.web
+ms.com
+ms.edu
+ms.gov
+ms.net
+ms.org
+nz.ac
+nz.co
+nz.cri
+nz.geek
+nz.gen
+nz.govt
+nz.health
+nz.iwi
+nz.kiwi
+nz.maori
+nz.mil
+nz.net
+nz.org
+nz.parliament
+nz.school
+pn.ac
+pn.co
+pn.in
+pn.net
+pn.org
+re.asso
+re.com
+re.nom
+ru, mari.ru.mari-el
+ru.ac
+ru.adygeya
+ru.altai
+ru.amur
+ru.amursk
+ru.arkhangelsk
+ru.astrakhan
+ru.baikal
+ru.bashkiria
+ru.belgorod
+ru.bir
+ru.bryansk
+ru.buryatia
+ru.cap
+ru.cbg
+ru.chel
+ru.chelyabinsk
+ru.chita
+ru.chukotka
+ru.cmw
+ru.com
+ru.dagestan
+ru.e-burg
+ru.edu
+ru.fareast
+ru.gov
+ru.grozny
+ru.int
+ru.irkutsk
+ru.ivanovo
+ru.izhevsk
+ru.jamal
+ru.jar
+ru.joshkar-ola
+ru.k-uralsk
+ru.kalmykia
+ru.kaluga
+ru.kamchatka
+ru.karelia
+ru.kazan
+ru.kchr
+ru.kemerovo
+ru.khabarovsk
+ru.khakassia
+ru.khv
+ru.kirov
+ru.kms
+ru.koenig
+ru.komi
+ru.kostroma
+ru.krasnoyarsk
+ru.kuban
+ru.kurgan
+ru.kursk
+ru.kustanai
+ru.kuzbass
+ru.lipetsk
+ru.magadan
+ru.magnitka
+ru.marine
+ru.mil
+ru.mordovia
+ru.mos
+ru.mosreg
+ru.msk
+ru.murmansk
+ru.mytis
+ru.nakhodka
+ru.nalchik
+ru.net
+ru.nkz
+ru.nnov
+ru.norilsk
+ru.nov
+ru.novosibirsk
+ru.nsk
+ru.omsk
+ru.orenburg
+ru.org
+ru.oryol
+ru.oskol
+ru.penza
+ru.perm
+ru.pp
+ru.pskov
+ru.ptz
+ru.pyatigorsk
+ru.rnd
+ru.rubtsovsk
+ru.ryazan
+ru.sakhalin
+ru.samara
+ru.saratov
+ru.simbirsk
+ru.smolensk
+ru.snz
+ru.spb
+ru.stavropol
+ru.stv
+ru.surgut
+ru.syzran
+ru.tambov
+ru.tatarstan
+ru.tlt
+ru.tom
+ru.tomsk
+ru.tsaritsyn
+ru.tsk
+ru.tula
+ru.tuva
+ru.tver
+ru.tyumen
+ru.udm
+ru.udmurtia
+ru.ulan-ude
+ru.vdonsk
+ru.vladikavkaz
+ru.vladimir
+ru.vladivostok
+ru.volgograd
+ru.vologda
+ru.voronezh
+ru.vrn
+ru.vyatka
+ru.yakutia
+ru.yamal
+ru.yaroslavl
+ru.yekaterinburg
+ru.yuzhno-sakhalinsk
+sg.com
+sg.edu
+sg.gov
+sg.net
+sg.org
+sg.per
+sh.co
+sh.com
+sh.edu
+sh.gov
+sh.net
+sh.nom
+sh.org
+tc.com
+tc.net
+tc.org
+tc.pro
+th.ac
+th.co
+th.go
+th.in
+th.mi
+th.net
+th.or
+tr.av
+tr.bbs
+tr.bel
+tr.biz
+tr.com
+tr.dr
+tr.edu
+tr.gen
+tr.gov
+tr.info
+tr.k12
+tr.mil
+tr.name
+tr.net
+tr.org
+tr.pol
+tr.tel
+tr.tv
+tr.web
+tt.aero
+tt.biz
+tt.charity
+tt.co
+tt.com
+tt.coop
+tt.edu
+tt.gov
+tt.info
+tt.int
+tt.jobs
+tt.mil
+tt.mobi
+tt.museum
+tt.name
+tt.net
+tt.org
+tt.pro
+tt.tel
+tt.travel
+ua.cherkassy
+ua.cherkasy
+ua.chernigov
+ua.chernivtsi
+ua.chernovtsy
+ua.ck
+ua.cn
+ua.com
+ua.cr
+ua.crimea
+ua.cv
+ua.dn
+ua.dnepropetrovsk
+ua.dnipropetrovsk
+ua.donetsk
+ua.dp
+ua.edu
+ua.gov
+ua.if
+ua.in
+ua.ivano-frankivsk
+ua.kh
+ua.kharkiv
+ua.kharkov
+ua.kherson
+ua.khmelnitskiy
+ua.kiev
+ua.kirovograd
+ua.km
+ua.kr
+ua.ks
+ua.kv
+ua.kyiv
+ua.lg
+ua.lugansk
+ua.lutsk
+ua.lv
+ua.lviv
+ua.mk
+ua.net
+ua.nikolaev
+ua.od
+ua.odesa
+ua.odessa
+ua.org
+ua.pl
+ua.poltava
+ua.pp
+ua.rivne
+ua.rovno
+ua.rv
+ua.sevastopol
+ua.sm
+ua.sumy
+ua.te
+ua.ternopil
+ua.uz
+ua.uzhgorod
+ua.vinnica
+ua.vl
+ua.vn
+ua.volyn
+ua.yalta
+ua.zaporizhzhe
+ua.zhitomir
+ua.zp
+ua.zt
+uk.ac
+uk.co
+uk.gov
+uk.ltd
+uk.me
+uk.mil
+uk.mod
+uk.net
+uk.nhs
+uk.nic
+uk.org
+uk.parliament
+uk.plc
+uk.police
+uk.sch
+us.fed
+us.isa
+us.nsn
+za.ac
+za.agric
+za.alt
+za.co
+za.edu
+za.gov
+za.grondar
+za.law
+za.mil
+za.net
+za.ngo
+za.nis
+za.nom
+za.org
+za.school
+za.tm
+za.web
--- a/src/README.md
+++ b/src/README.md
@@ -0,0 +1,31 @@
+- `main.py`  
+Simply call this script in a cron job or something and it will take care of everything else.
+It will run the scripts below in the proper order.
+
+- `3rd-domains.txt`  
+Contains a list of common 3rd level domains, such as `co.uk`.
+
+- `common_lib.py`  
+Library with useful functions used across multiple python scripts.
+
+- `bundle_import.py`  
+Will copy all `*.json` files from `data/_in` to their bundle id dest folder e.g. 
+`mv  data/_in/test.json  data/com/apple/notes/id_42.json`.
+
+- `bundle_combine.py`  
+Merges all `id_*.json` files from a bundle id into a single `combined.json`.
+(run this script with one or multiple bundle ids as parameter.)
+
+- `bundle_download.py`  
+Download and cache app metadata from apple servers in de and en given a bundle id. Will also download the app icon and store it in the bundle id out folder.
+(run this script with one or multiple bundle ids as parameter.)
+
+- `html_bundle.py`  
+Takes the `combined.json` file and generates the graphs and html file.
+(run this script with one or multiple bundle ids as parameter.)
+
+- `html_index.py`  
+Create all pages for the app index and link to bundle id subpages.
+
+- `html_root.py`  
+Create main `index.html`.
--- a/src/bundle_combine.py
+++ b/src/bundle_combine.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+
+import sys
+import common_lib as mylib
+
+
+with open(mylib.path_root('src', '3rd-domains.txt'), 'r') as fp:
+    level3_doms = set([x.strip() for x in fp.readlines()])
+
+
+def dom_in_3rd_domain(needle):
+    # TODO: binary tree lookup
+    return needle in level3_doms
+
+
+def get_parent_domain(subdomain):
+    parts = subdomain.split('.')
+    if len(parts) < 3:
+        return x
+    elif dom_in_3rd_domain('.'.join(parts[-2:])):
+        return '.'.join(parts[-3:])
+    else:
+        return '.'.join(parts[-2:])
+
+
+def dict_increment(ddic, key, num):
+    try:
+        ddic[key]
+    except KeyError:
+        ddic[key] = 0
+    ddic[key] += num
+
+
+def json_combine(bundle_id):
+    res = dict()
+    domA = dict()  # unique sub domains
+    domB = dict()  # total sub domains
+    domC = dict()  # unique parent domains
+    domD = dict()  # total parent domains
+    for fname, jdata in mylib.enum_jsons(bundle_id):
+        res['name'] = jdata['app-name']
+        dict_increment(res, '#rec', 1)
+        dict_increment(res, 'rec-total', jdata['duration'])
+        try:
+            logs = jdata['logs']
+            uniq_par = set()
+            for subdomain in logs:
+                occurs = len(logs[subdomain])
+                dict_increment(res, '#logs', occurs)
+                dict_increment(domA, subdomain, 1)
+                dict_increment(domB, subdomain, occurs)
+                par_dom = get_parent_domain(subdomain)
+                uniq_par.add(par_dom)
+                dict_increment(domD, par_dom, occurs)
+            for par in uniq_par:
+                dict_increment(domC, par, 1)
+        except KeyError:
+            mylib.err('bundle-combine', 'skip: ' + fname)
+    res['uniq_subdom'] = domA
+    res['uniq_pardom'] = domC
+    res['total_subdom'] = domB
+    res['total_pardom'] = domD
+    return res
+
+
+def process(bundle_ids):
+    print('writing combined json ...')
+    if bundle_ids == ['*']:
+        bundle_ids = list(mylib.enum_appids())
+    for bid in bundle_ids:
+        print('  ' + bid)
+        mylib.json_write_combined(bid, json_combine(bid))
+    print('')
+
+
+if __name__ == '__main__':
+    args = sys.argv[1:]
+    if len(args) > 0:
+        process(args)
+    else:
+        # process(['*'])
+        mylib.usage(__file__, '[bundle_id] [...]')
--- a/src/bundle_download.py
+++ b/src/bundle_download.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+
+import sys
+import common_lib as mylib
+
+
+def download_info(bundle_id, lang, force=False):
+    if force or not mylib.meta_json_exists(bundle_id, lang):
+        url = 'https://itunes.apple.com/lookup?bundleId={}&country={}'.format(
+            bundle_id, lang.upper())
+        json = mylib.download(url, isJSON=True)
+        json = json['results'][0]
+        # delete unused keys to save on storage
+        for key in ['supportedDevices', 'releaseNotes', 'description',
+                    'screenshotUrls']:
+            try:
+                del(json[key])
+            except KeyError:
+                continue
+        mylib.json_write_meta(bundle_id, json, lang)
+
+
+def download_icon(bundle_id, force=False, langs=['us', 'de']):
+    # icon_file = mylib.path_data_app(bundle_id, 'icon.png')
+    icon_file = mylib.path_out_app(bundle_id, 'icon.png')
+    if force or not mylib.file_exists(icon_file):
+        json = None
+        for lang in langs:
+            if not json:
+                json = mylib.json_read_meta(bundle_id, lang)
+        mylib.download_file(json['artworkUrl100'], icon_file)
+
+
+def download(bundle_id, force=False):
+    if not mylib.valid_bundle_id(bundle_id):
+        mylib.err('apple-download', 'invalid id: ' + bundle_id)
+        return
+
+    mylib.printf('  {} => '.format(bundle_id))
+    for lang in ['us', 'de']:
+        try:
+            mylib.printf(lang)
+            download_info(bundle_id, lang, force=force)
+            mylib.printf('[✔] ')
+        except Exception:
+            mylib.printf('[✘] ')
+            mylib.err('apple-download', 'json {}: {}'.format(
+                      lang.upper(), bundle_id), logOnly=True)
+    try:
+        mylib.printf('icon')
+        download_icon(bundle_id, force=force)
+        mylib.printf('[✔] ')
+    except Exception:
+        mylib.printf('[✘] ')
+        mylib.err('apple-download', 'img for ' + bundle_id, logOnly=True)
+    print('')  # end printf line
+
+
+def process(bundle_ids, force=False):
+    print('downloading bundle info ...')
+    if bundle_ids == ['*']:
+        bundle_ids = list(mylib.enum_data_appids())
+
+    for bid in bundle_ids:
+        download(bid, force=force)
+    print('')
+
+
+if __name__ == "__main__":
+    args = sys.argv[1:]
+    if len(args) > 0:
+        process(args)
+    else:
+        # process(['*'], force=False)
+        mylib.usage(__file__, '[bundle_id] [...]')
--- a/src/bundle_import.py
+++ b/src/bundle_import.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+
+import os
+import common_lib as mylib
+
+
+def next_path(path_pattern):
+    i = 1
+    while os.path.exists(path_pattern % i):
+        i = i * 2
+    a, b = (i // 2, i)
+    while a + 1 < b:
+        c = (a + b) // 2  # interval midpoint
+        a, b = (c, b) if os.path.exists(path_pattern % c) else (a, c)
+    return path_pattern % b
+
+
+def process():
+    print('checking incoming files ...')
+    prefix = mylib.path_len(mylib.path_data())
+    needs_update = set()
+    for fname, jdata in mylib.enum_newly_added():
+        try:
+            bundle_id = jdata['app-bundle'].strip()
+            if mylib.valid_bundle_id(bundle_id):
+                dest = mylib.path_data_app(bundle_id)
+                needs_update.add(bundle_id)
+            else:
+                dest = mylib.path_data('_manually')
+                # needs_update.add('_manually')
+
+            mylib.mkdir(dest)
+            dest_file = next_path(mylib.path_add(dest, 'id_%s.json'))
+            mylib.mv(fname, dest_file, printOmitPrefix=prefix)
+        except KeyError:
+            mylib.err('json-import', 'malformed json: ' + bundle_id)
+    print('done.')
+    print('')
+    return needs_update
--- a/src/common_lib.py
+++ b/src/common_lib.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python3
+
+import os
+import re
+import glob
+import json
+import shutil
+import logging
+from pathlib import Path
+import urllib.request as curl
+
+base_dir = os.path.realpath(os.path.join(
+    os.path.dirname(os.path.realpath(__file__)), os.pardir))
+
+
+# Paths
+
+def path_add(base, *parts):
+    return os.path.join(base, *parts)
+
+
+def path_root(*parts):
+    return os.path.join(base_dir, *parts)
+
+
+def path_data(*path_components):
+    return path_root('data', *path_components)
+
+
+def path_data_app(bundle_id, filename=None):
+    pth = path_root('data', *bundle_id.split('.'))
+    return path_add(pth, filename) if filename else pth
+
+
+def path_out(*path_components):
+    return path_root('out', *path_components)
+
+
+def path_out_app(bundle_id, filename=None):
+    pth = path_root('out', 'app', bundle_id)
+    return path_add(pth, filename) if filename else pth
+
+
+def path_len(path, isDir=True):
+    return len(path) + (len(os.sep) if isDir else 0)
+
+
+# Tempaltes
+
+def template(html_file):
+    return path_root('templates', html_file)
+
+
+def template_with_base(content, title=None):
+    with open(template('base.html'), 'r') as fp:
+        return fp.read().replace(
+            '#_TITLE_#', title + ' – ' if title else '').replace(
+            '#_CONTENT_#', content)
+
+
+# Other
+
+regex_bundle_id = re.compile(r'^[A-Za-z0-9\.\-]{1,155}$')
+logging.basicConfig(filename=os.path.join(os.pardir, "error.log"),
+                    format='%(asctime)s %(message)s',
+                    filemode='a')
+logger = logging.getLogger()
+
+
+def usage(_file_, params=''):
+    print(' usage: ' + os.path.basename(_file_) + ' ' + params)
+
+
+def valid_bundle_id(bundle_id):
+    return regex_bundle_id.match(bundle_id)
+
+
+def err(scope, msg, logOnly=False):
+    logger.error('[{}] {}'.format(scope, msg))
+    if not logOnly:
+        print(' [ERROR] ' + msg)
+
+
+def printf(msg):
+    print(msg, end='', flush=True)
+
+
+# Filesystem
+
+def mkdir(path):
+    Path(path).mkdir(parents=True, exist_ok=True)
+
+
+def mv(path, to, printOmitPrefix=None):
+    if printOmitPrefix:
+        print('  mv ' + path[printOmitPrefix:] + ' -> ' + to[printOmitPrefix:])
+    Path(path).rename(to)
+
+
+def rm(path):
+    shutil.rmtree(path)
+
+
+def dir_exists(path):
+    return os.path.isdir(path)
+
+
+def file_exists(path):
+    return os.path.isfile(path) and os.path.getsize(path) > 0
+
+
+def meta_json_exists(bundle_id, lang):
+    return file_exists(path_data_app(bundle_id, 'info_{}.json'.format(lang)))
+
+
+# Download
+
+def download(url, isJSON=False):
+    req = curl.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
+    with curl.urlopen(req) as response:
+        data = response.read()
+    return json.loads(data.decode()) if isJSON else data
+
+
+def download_file(url, path):
+    curl.urlretrieve(url, path)
+
+
+# Enumerator
+
+def enum_newly_added():
+    for fname in glob.glob(path_data('_in', '*.json')):
+        with open(fname, 'r') as fp:
+            yield fname, json.load(fp)
+
+
+def enum_appids():
+    for x in glob.glob(path_out_app('*')):
+        yield os.path.basename(x)
+
+
+def enum_jsons(bundle_id):
+    for fname in glob.glob(path_data_app(bundle_id, 'id_*.json')):
+        with open(fname, 'r') as fp:
+            yield fname, json.load(fp)
+
+
+def enum_data_appids():
+    data_root = path_data()
+    prfx = path_len(data_root)
+    for path, dirs, files in os.walk(data_root):
+        if 'combined.json' in files:
+            yield path[prfx:].replace(os.sep, '.')
+
+
+# JSON read
+
+def json_read(path):
+    with open(path, 'r') as fp:
+        return json.load(fp)
+
+
+def json_read_combined(bundle_id):
+    return json_read(path_data_app(bundle_id, 'combined.json'))
+
+
+def json_read_meta(bundle_id, lang):
+    return json_read(path_data_app(bundle_id, 'info_{}.json'.format(lang)))
+
+
+# JSON write
+
+def json_write(path, obj, pretty=False):
+    with open(path, 'w') as fp:
+        json.dump(obj, fp, indent=2 if pretty else None, sort_keys=pretty)
+
+
+def json_write_combined(bundle_id, obj):
+    fname = path_data_app(bundle_id, 'combined.json')
+    json_write(fname, obj, pretty=True)
+
+
+def json_write_meta(bundle_id, obj, lang):
+    fname = path_data_app(bundle_id, 'info_{}.json'.format(lang))
+    json_write(fname, obj, pretty=True)
--- a/src/html_bundle.py
+++ b/src/html_bundle.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+
+import sys
+import common_lib as mylib
+import matplotlib
+import matplotlib.pyplot as plt
+
+matplotlib.use('Agg')  # disable interactive mode
+
+
+def sort_dict(count_dict):
+    sorted_count = sorted(count_dict.items(), key=lambda x: (-x[1], x[0]))
+    names = ['{} ({})'.format(*x) for x in sorted_count]
+    sizes = [x[1] for x in sorted_count]
+    return names, sizes
+
+
+def gen_graph(count_dict, outfile):
+    names, sizes = sort_dict(count_dict)
+    pie1, _ = plt.pie(sizes, labels=names)
+    plt.setp(pie1, width=0.5, edgecolor='white')
+    plt.subplots_adjust(left=0, right=1, top=0.7, bottom=0.3)
+    plt.savefig(outfile, bbox_inches='tight', pad_inches=0)  # transparent=True
+    plt.close()
+
+
+def seconds_to_time(seconds):
+    minutes, seconds = divmod(seconds, 60)
+    hours, minutes = divmod(minutes, 60)
+    return '{:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds)
+
+
+def gen_domain_tags(unsorted_dict):
+    for x in sorted(unsorted_dict):
+        yield '<i>{}</i>'.format(x)
+
+
+def gen_html(bundle_id, obj):
+    return mylib.template_with_base(f'''
+<h2>{obj['name']}</h2>
+<div id="meta">
+  <table>
+    <tr><td>Bundle-id:</td><td>{
+        bundle_id
+    }</td></tr>
+    <tr><td>Number of recordings:</td><td>{
+        obj['#rec']
+    }</td></tr>
+    <tr><td>Total number of logs:</td><td>{
+        obj['#logs']
+    }</td></tr>
+    <tr><td>Cumulative recording time:</td><td>{
+        seconds_to_time(obj['rec-total'])
+    }</td></tr>
+    <tr><td>Average recording time:</td><td>{
+         round(obj['rec-total'] / obj['#rec'], 1)
+    } s</td></tr>
+  </table>
+</div>
+<h3>Connections</h3>
+<div id="connections">
+  <table>
+    <tr><td>Domains:</td><td>{
+        ''.join(gen_domain_tags(obj['uniq_pardom']))
+    }</td></tr>
+    <tr><td>Subdomains:</td><td>{
+        ''.join(gen_domain_tags(obj['uniq_subdom']))
+    }</td></tr>
+    <tr><td>Known Trackers:</td><td>{
+        '...'
+    }</td></tr>
+  </table>
+  <figure><img src="par.svg"></figure>
+  <figure><img src="sub.svg"></figure>
+</div>''', title=obj['name'])
+
+
+def make_bundle_out(bundle_id):
+    jdata = mylib.json_read_combined(bundle_id)
+    out_dir = mylib.path_out_app(bundle_id)
+    needs_update_index = False
+    if not mylib.dir_exists(out_dir):
+        needs_update_index = True
+        mylib.mkdir(out_dir)
+    try:
+        gen_graph(jdata['total_subdom'], mylib.path_add(out_dir, 'sub.svg'))
+        gen_graph(jdata['total_pardom'], mylib.path_add(out_dir, 'par.svg'))
+    except KeyError:
+        mylib.err('bundle-generate-page', 'skip: ' + bundle_id)
+
+    with open(mylib.path_add(out_dir, 'index.html'), 'w') as fp:
+        fp.write(gen_html(bundle_id, jdata))
+    return needs_update_index
+
+
+def process(bundle_ids):
+    print('generating html pages ...')
+    if bundle_ids == ['*']:
+        bundle_ids = list(mylib.enum_appids())
+
+    ids_new_in_index = set()
+    for bid in bundle_ids:
+        print('  ' + bid)
+        if make_bundle_out(bid):
+            ids_new_in_index.add(bid)
+    print('')
+    return ids_new_in_index
+
+
+if __name__ == '__main__':
+    args = sys.argv[1:]
+    if len(args) > 0:
+        process(args)
+    else:
+        # process(['*'])
+        mylib.usage(__file__, '[bundle_id] [...]')
--- a/src/html_index.py
+++ b/src/html_index.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+
+import common_lib as mylib
+
+
+def gen_obj(bundle_id):
+    if mylib.file_exists(mylib.path_out_app(bundle_id, 'icon.png')):
+        icon = '/app/{0}/icon.png'.format(bundle_id)
+    else:
+        icon = '/static/app-template.svg'
+    try:
+        name = mylib.json_read_meta(bundle_id, 'de')['trackCensoredName']
+    except Exception:
+        name = '&lt; App-Name &gt;'
+    return {'id': bundle_id, 'name': name, 'img': icon}
+
+
+def gen_entry(obj):
+    return '''
+<a href="/app/{id}/index.html">
+  <div>
+    <img src="{img}" width="100" height="100">
+    <span class="name">{name}</span><br />
+    <span class="detail">{id}</span>
+  </div>
+</a>'''.format(**obj)
+
+
+def gen_pager(current, total):
+
+    def mklink(i, name, active=False):
+        clss = ' class="active"' if active else ''
+        return '<a href="./{}"{}>{}</a>'.format(i, clss, name)
+
+    links = ''
+    # if current > 1:
+    #     links += mklink(current - 1, 'Previous')
+    start = max(1, current - 5)
+    for i in range(start, min(total, start + 10) + 1):
+        links += mklink(i, i, active=i == current)
+    # if current < total:
+    #     links += mklink(current + 1, 'Next')
+    return '<div id="pagination">{}</div>'.format(links)
+
+
+def gen_page(arr, base, page_id=1, total=1):
+    path = mylib.path_add(base, str(page_id))
+    mylib.mkdir(path)
+    with open(mylib.path_add(path, 'index.html'), 'w') as fp:
+        content = ''.join([gen_entry(x) for x in arr])
+        pagination = gen_pager(page_id, total)  # if total > 1 else ''
+        fp.write(mylib.template_with_base('''
+<h2>List of app recordings (A–Z)</h2>
+<div id="app-toc">
+  {}
+</div>
+{}'''.format(content, pagination), title="Index"))
+
+
+def process(per_page=60):
+    print('generating app-index ...')
+    index_dir = mylib.path_out('index', 'page')
+    mylib.rm(index_dir)
+    mylib.mkdir(index_dir)
+
+    apps = [gen_obj(x) for x in mylib.enum_appids()]
+    apps_total = len(apps)
+    pages_total, rest = divmod(apps_total, per_page)
+    if rest > 0:
+        pages_total += 1
+    print('  {} apps'.format(apps_total))
+    print('  {} pages'.format(pages_total))
+
+    apps_sorted = sorted(apps, key=lambda x: (x['name'], x['id']))
+    for x in range(1, pages_total + 1):
+        start = (x - 1) * per_page
+        batch = apps_sorted[start:start + per_page]
+        gen_page(batch, index_dir, x, pages_total)
+    print('')
+
+
+if __name__ == '__main__':
+    process()
--- a/src/html_root.py
+++ b/src/html_root.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+
+import common_lib as mylib
+
+
+def gen_root():
+    with open(mylib.path_out('index.html'), 'w') as fp:
+        fp.write(mylib.template_with_base('''
+<h2>About</h2>
+<p class="squeeze">
+  Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
+  tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
+  quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+  consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
+  cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
+  proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+</p>
+  <a id="get-appcheck" href="https://testflight.apple.com/join/9jjaFeHO">
+    <img src="/static/appcheck.svg" alt="app-icon" width="30" height="30">
+    <p>
+      Get the app and contribute.<br />
+      Join the TestFlight Beta.
+    </p>
+  </a>{}'''. format('')))
+
+
+def process():
+    print('generating root html ...')
+    gen_root()  # root index.thml
+
+
+if __name__ == '__main__':
+    process()
--- a/src/main.py
+++ b/src/main.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+import common_lib as mylib
+import bundle_import
+import bundle_combine
+import bundle_download
+import html_root
+import html_index
+import html_bundle
+
+
+def reset_new():
+    print("RESET json files ...")
+    prefix = mylib.path_len(mylib.path_data())
+    for bid in mylib.enum_appids():
+        for src, _ in mylib.enum_jsons(bid):
+            frmt = mylib.path_add(mylib.path_data('_in', '%s.json'))
+            dest = bundle_import.next_path(frmt)
+            mylib.mv(src, dest, printOmitPrefix=prefix)
+    print('')
+
+
+def del_id(bundle_ids):
+    if bundle_ids == ['*']:
+        bundle_ids = list(mylib.enum_appids())
+
+    for bid in bundle_ids:
+        dest = mylib.path_out_app(bid)
+        if mylib.dir_exists(dest):
+            mylib.rm(dest)
+            html_index.process()
+
+
+def full_chain(force=False):
+    bundle_ids = bundle_import.process()
+    if force:
+        bundle_ids = list(mylib.enum_data_appids())
+    if len(bundle_ids) > 0:
+        bundle_combine.process(bundle_ids)
+        new_ids = html_bundle.process(bundle_ids)
+        if len(new_ids) > 0:
+            bundle_download.process(new_ids)
+            html_index.process()
+            html_root.process()
+        else:
+            print('none to import, not rebuilding index')
+
+
+def process():
+    # reset_new()
+    # del_id(['*'])
+    full_chain(force=False)
+
+
+process()