From 996e504a9ff63ca6a1ab30b6c6adfda1d617ef95 Mon Sep 17 00:00:00 2001 From: relikd Date: Mon, 26 Feb 2024 22:03:19 +0100 Subject: [PATCH] Initial --- .gitignore | 2 + README.md | 98 ++++++ apple-touch-icon.png | Bin 0 -> 1194 bytes favicon.ico | Bin 0 -> 888 bytes favicon.svg | 11 + index.html | 67 ++++ ipa_archive.py | 582 ++++++++++++++++++++++++++++++++++ script.js | 289 +++++++++++++++++ style.css | 93 ++++++ tools/check_error_no_plist.sh | 10 + tools/check_missing_img.sh | 10 + tools/convert_plist.sh | 21 ++ tools/image_optim.sh | 52 +++ tools/plist_server.py | 59 ++++ tools/plist_server/index.php | 24 ++ 15 files changed, 1318 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 apple-touch-icon.png create mode 100644 favicon.ico create mode 100644 favicon.svg create mode 100644 index.html create mode 100755 ipa_archive.py create mode 100644 script.js create mode 100644 style.css create mode 100755 tools/check_error_no_plist.sh create mode 100755 tools/check_missing_img.sh create mode 100755 tools/convert_plist.sh create mode 100755 tools/image_optim.sh create mode 100755 tools/plist_server.py create mode 100644 tools/plist_server/index.php diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..0bf857cd --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.DS_Store +ipa_download/ diff --git a/README.md b/README.md new file mode 100644 index 00000000..44a1b668 --- /dev/null +++ b/README.md @@ -0,0 +1,98 @@ +# The Grand IPA Archive + +Hello, this project aims to provide a searchable and filterable index for .ipa files. +None of the linked files are mine, nor am I involved in any capacity on the referenced projects. +I merely wrote the crawler to index IPA files in various [Archive.org](https://archive.org) collections. +The list of indexed collections can be found at [data/urls.json](data/urls.json). + + +## Using the webpage + +You can add the IPA Archive webpage to your homescreen. +Note however, that each time you click on the app icon, it will load the whole database again and clear your previously entered data. +To prevent that, use Safari to jump back to your search results. +The homescreen icon is still useful as bookmark though ;-) + +Additionally, your configuration is saved in the URL. +For example, if you have an iPad 1. Gen, you can select device "iPad" and maxOS "5.1.1". +Then click on search and safe that URL to your homescreen. +(Or wait until you have configured your Plist server and save that URL instead) + + +## TODO + +- Reindexing of previous URLs (should remove dead-links and add new ones) +- Periodic check on outdated URLs (see previous) + + +## Requirements + +- `ipa_archive.py` has a dependency on [RemoteZip](https://github.com/gtsystem/python-remotezip) (`pip install remotezip`) +- `image_optim.sh` uses [ImageOptim](https://github.com/ImageOptim/ImageOptim) (and probably requires a Mac) +- The [Plist Generator server](#starting-plist-server) needs either Python or PHP + + +## General workflow + +To add files to the archive follow these steps: + +1. `python3 ipa_archive.py add URL` +2. `python3 ipa_archive.py run` +3. If any of the URLs failed, check if it can be fixed. (though most likely the ipa-zip file is broken) + - If you could fix any file, run `python3 ipa_archive.py err reset` to try again (this will also print the error again) + - If some files are unfixable, run `python3 ipa_archive.py set err ID1 ID2` to ignore them +4. `./tools/image_optim.sh` (this will convert all .png files to .jpg) +5. `python3 ipa_archive.py export json` + +Handling plist errors (json-like format): +- `./tools/plist_convert.sh 21968` +- `./ipa_archive.py get img 21968` + + +## Database schema + +The column `done` is encoded as follows: +- `0` (queued, needs processing) +- `1` (done) +- `3` (error, maybe fixable, needs attention) +- `4` (error, unfixable, ignore in export) + + +## Starting Plist Server + +You need to start the plist generator service on a network location that is accessible to your iDevice. +That can be, for example, your local machine which is accissble through your home network (LAN). +Therefore you will need to determine the IP address of your hosting PC. +You can either use Python or PHP to host the service. + +(it is sufficient to copy and execute one of server files, either python or php) + + +### ... with Python + +With python, the IP address *should* be determined automatically. +After starting the server: + +```sh +python3 tools/plist_server.py +``` + +it will print out something like `Server started http://192.168.0.1:8026`. +Use this address on the IPA Archive webpage. +If the IP starts with `127.x.x.x` or `10.x.x.x`, you will need to find the IP address manually and use that instead. + + +### ... with PHP + +Similar to python, you start the server with: + +```sh +php -S 0.0.0.0:8026 -t tools/plist_server +``` + +However, you have to find your local IP address manually (Mac: `ipconfig getifaddr en0`). +Note, we use `0.0.0.0` instead of localhost, to make the server available to other network devices. +If you are inside the `plist_server` folder, you can omit the `-t` flag. + +For the IPA Archive webpage you should use `http://192.168.0.1:8026` (with your own IP address). + diff --git a/apple-touch-icon.png b/apple-touch-icon.png new file mode 100644 index 0000000000000000000000000000000000000000..9a14e892e59b7c90426c6a86881f7bcda4270799 GIT binary patch literal 1194 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}91IK$3=+#C-!U+-6neTihEy=Vox?rxv6X;p z`(%!e1`de^MjPf1h8zZt1I7&!4r~n!DhxRcMh*As?bgpdr&axR+45EXwcnpxJ?&hd zU!M8-nVq6q?j>Zj)*y4T88UkLA^+UZ9mVB8^evws_FGwU;>+YqyR|o7;D0vP?d7@) ztuN{(zc}PLe~W&J^Yc2Pmu*Y@suo52y?Ix!t}*#r$@zP=>X&5~3wwUzXVtg8V0$#) zUPj#S)DmIA-K+OqsK21+d$9M*%O(AdTfEDUSZus+sXe)j+g3eua^IIJSF*d^^&aZgEwEic2C^)($xF+)NK{zvzFeSXBN@ymASJ?WqC}M!m&qNz10_%WK89A z)sQ`K-C~Neq1Ew&;%Yf7<>r>Xyj=G3gP?wIo==W_nO5A|E&8{YCaXwY^;-V+(#6fk z11FYL1X!hs)pRW}Hav7yb=8!2@>BX|))`dyUiw`2vctz^;+J_6AN6*52dKZ5pTg%a z-uG(Ba`vUln-+3A?Ed@Z$g*RTjb82vwE7|M5_;+IVae4Oc#k?S{aCo&a#l~*l2*y? zy-FW1{gCYbyHD(;XT+OzYrZuu{b(%KKQ*$mWSZFS_c>D-Js+p*ESw~=-r?5g9oF6J zPjRaD&floFX3u1WB_EBa?b&p$AoGBirp@XgW+y583Be_nj|M2VNqOIR2F zU|+7~#o*zbuH&Y1jAi1n<8${2ytMbRI;&M*4$y!2%R$B{kB{Oixy?9rIZ zapL0UFpbUio>dAO?Ejo5b*}d~-=ezl#$?TXaaKaJ0xB=o?)_rDaOMp=N44wT=}nI# z-*ir?dTD&HbE>_AXt~wSRX41!d-t=bUtjt^ME~yAiR(J{M(Dpairb~-E&jb}OFW0q zs-4o=-@CTFzQ8DSbDvAWyZD?+yR|Jx?C0!}u>5mW-~un_F$JAD8h1DGbL|gP2o>Qq zy;LV?B^ze-=8~cOrE}(%@-f5GPr`nuyGEjrYx7f$-7U$jw9{2d|v;XA1 zg9+mCRxboL#Mj4}?qvHDm(nX|dxKAN>Ai;u;{6Br?YOkW_ZD6xbvC?QoYVW;ulWX` OLdetA&t;ucLK6TmwLhZ( literal 0 HcmV?d00001 diff --git a/favicon.ico b/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..f9b6aad2c0c33cb9eca360476b5c12b670fcea51 GIT binary patch literal 888 zcmZQzU}Rus5D;KsU|>*S*uucTUXIz`}Ruhsos| z6_Fw_3UB|{$3IS;pJ#Gl>!0|y2{NUNkBh5q`2TrI>ak4NO%*?nA4(}xmGIsp@@5AhJ`dNBFbJfDMJ99Wo?=tf27f@wj zI1>{tGcC7gD#$sWu6{1-oD!O_xEbV21+bgL1XVQ{7#JQv+{h(uhR3y5T#vT8*a(!AYZzY=eJgyVmb=x{OedlI5Q86=(u+i=DMH4& zPg}C%Jbth}<%wD=a*PG+XFLG|if#k2qN2=Y(F_cXHjscxGEft|;Cpxl8^hufrxyx5 z&4n=`JXaieLmYU{II!|CiW;!+Fe)ry`o_}0p!J~o;i+{wuk*iue9pmpy<6Gp>+@3w z&K~HMaPZze<Rvxz1>T{hC>Ce-tJdU)X%W>-f_@!m2JD=q3(_XA`hw;TlNXiR)(IVZWN$ONZaR9ZU&j` + + + + + + + + + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..ebcedf41 --- /dev/null +++ b/index.html @@ -0,0 +1,67 @@ + + + + + + + IPA Archive + + + + + + + +

IPA Archive

+
+ + + + + + +
+
JavaScript disabled?
+ + + + + + \ No newline at end of file diff --git a/ipa_archive.py b/ipa_archive.py new file mode 100755 index 00000000..5b3c80f6 --- /dev/null +++ b/ipa_archive.py @@ -0,0 +1,582 @@ +#!/usr/bin/env python3 +from typing import TYPE_CHECKING, Iterable +from multiprocessing import Pool +from pathlib import Path +from urllib.parse import quote +from urllib.request import Request, urlopen, urlretrieve +from argparse import ArgumentParser +from sys import stderr +import plistlib +import sqlite3 +import json +import gzip +import os +import re + +import warnings +with warnings.catch_warnings(): # hide macOS LibreSSL warning + warnings.filterwarnings('ignore') + from remotezip import RemoteZip # pip install remotezip + +if TYPE_CHECKING: + from zipfile import ZipInfo + + +USE_ZIP_FILESIZE = False +re_info_plist = re.compile(r'Payload/([^/]+)/Info.plist') +# re_links = re.compile(r''']*href=["']([^>]+\.ipa)["'][^>]*>''') +re_archive_url = re.compile( + r'https?://archive.org/(?:metadata|details|download)/([^/]+)(?:/.*)?') +CACHE_DIR = Path(__file__).parent / 'data' +CACHE_DIR.mkdir(exist_ok=True) + + +def main(): + CacheDB().init() + parser = ArgumentParser() + cli = parser.add_subparsers(metavar='command', dest='cmd', required=True) + + cmd = cli.add_parser('add', help='Add urls to cache') + cmd.add_argument('urls', metavar='URL', nargs='+', + help='Search URLs for .ipa links') + + cmd = cli.add_parser('run', help='Download and process pending urls') + cmd.add_argument('-force', '-f', action='store_true', + help='Reindex local data / populate DB.' + 'Make sure to export fsize before!') + + cmd = cli.add_parser('export', help='Export data') + cmd.add_argument('export_type', choices=['json', 'fsize'], + help='Export to json or temporary-filesize file') + + cmd = cli.add_parser('err', help='Handle problematic entries') + cmd.add_argument('err_type', choices=['reset'], help='Set done=0 to retry') + + cmd = cli.add_parser('get', help='Lookup value') + cmd.add_argument('get_type', choices=['url', 'img', 'ipa'], + help='Get data field or download image.') + cmd.add_argument('pk', metavar='PK', type=int, + nargs='+', help='Primary key') + + cmd = cli.add_parser('set', help='(Re)set value') + cmd.add_argument('set_type', choices=['err'], help='Data field/column') + cmd.add_argument('pk', metavar='PK', type=int, + nargs='+', help='Primary key') + + args = parser.parse_args() + + if args.cmd == 'add': + for url in args.urls: + crawler(url) + print('done.') + + elif args.cmd == 'run': + if args.force: + print('Resetting done state ...') + CacheDB().setAllUndone(whereDone=1) + processPending() + + elif args.cmd == 'err': + if args.err_type == 'reset': + print('Resetting error state ...') + CacheDB().setAllUndone(whereDone=3) + + elif args.cmd == 'export': + if args.export_type == 'json': + export_json() + elif args.export_type == 'fsize': + export_filesize() + + elif args.cmd == 'get': + DB = CacheDB() + if args.get_type == 'url': + for pk in args.pk: + print(pk, ':', DB.getUrl(pk)) + elif args.get_type == 'img': + for pk in args.pk: + url = DB.getUrl(pk) + print(pk, ': load image', url) + loadIpa(pk, url, overwrite=True, image_only=True) + elif args.get_type == 'ipa': + dir = Path('ipa_download') + dir.mkdir(exist_ok=True) + for pk in args.pk: + url = DB.getUrl(pk) + print(pk, ': load ipa', url) + urlretrieve(url, dir / f'{pk}.ipa', printProgress) + print(end='\r') + + elif args.cmd == 'set': + DB = CacheDB() + if args.set_type == 'err': + for pk in args.pk: + print(pk, ': set done=4') + DB.setPermanentError(pk) + + +############################################### +# Database +############################################### + +class CacheDB: + def __init__(self) -> None: + self._db = sqlite3.connect(CACHE_DIR / 'ipa_cache.db') + self._db.execute('pragma busy_timeout=5000') + + def init(self): + self._db.execute(''' + CREATE TABLE IF NOT EXISTS urls( + pk INTEGER PRIMARY KEY, + url TEXT NOT NULL UNIQUE + ); + ''') + self._db.execute(''' + CREATE TABLE IF NOT EXISTS idx( + pk INTEGER PRIMARY KEY, + base_url INTEGER NOT NULL, + path_name TEXT NOT NULL, + done INTEGER DEFAULT 0, + fsize INTEGER DEFAULT 0, + + min_os INTEGER DEFAULT NULL, + platform INTEGER DEFAULT NULL, + title TEXT DEFAULT NULL, + bundle_id TEXT DEFAULT NULL, + version TEXT DEFAULT NULL, + + UNIQUE(base_url, path_name) ON CONFLICT ABORT, + FOREIGN KEY (base_url) REFERENCES urls (pk) ON DELETE RESTRICT + ); + ''') + + def __del__(self) -> None: + self._db.close() + + # insert URLs + + def insertBaseUrl(self, base: str) -> int: + try: + x = self._db.execute('INSERT INTO urls (url) VALUES (?);', [base]) + self._db.commit() + return x.lastrowid # type: ignore + except sqlite3.IntegrityError: + x = self._db.execute('SELECT pk FROM urls WHERE url = ?;', [base]) + return x.fetchone()[0] + + def insertIpaUrls(self, entries: 'Iterable[tuple[int, str, int]]') -> int: + self._db.executemany(''' + INSERT OR IGNORE INTO idx (base_url, path_name, fsize) VALUES (?,?,?); + ''', entries) + self._db.commit() + return self._db.total_changes + + def getUrl(self, uid: int) -> str: + x = self._db.execute('''SELECT url, path_name FROM idx + INNER JOIN urls ON urls.pk=base_url WHERE idx.pk=?;''', [uid]) + base, path = x.fetchone() + return base + '/' + quote(path) + + # Export JSON + + def jsonUrlMap(self) -> 'dict[int, str]': + x = self._db.execute('SELECT pk, url FROM urls') + rv = {} + for pk, url in x: + rv[pk] = url + return rv + + def enumJsonIpa(self, *, done: int) -> Iterable[tuple]: + yield from self._db.execute(''' + SELECT pk, platform, IFNULL(min_os, 0), + TRIM(IFNULL(title, + REPLACE(path_name,RTRIM(path_name,REPLACE(path_name,'/','')),'') + )) as tt, IFNULL(bundle_id, ""), + version, base_url, path_name, fsize / 1024 + FROM idx WHERE done=? + ORDER BY tt COLLATE NOCASE, min_os, platform, version;''', [done]) + + # Filesize + + def enumFilesize(self) -> Iterable[tuple]: + yield from self._db.execute('SELECT pk, fsize FROM idx WHERE fsize>0;') + + def setFilesize(self, uid: int, size: int) -> None: + if size > 0: + self._db.execute('UPDATE idx SET fsize=? WHERE pk=?;', [size, uid]) + self._db.commit() + + # Process Pending + + def count(self, *, done: int) -> int: + x = self._db.execute('SELECT COUNT() FROM idx WHERE done=?;', [done]) + return x.fetchone()[0] + + def getPendingQueue(self, *, done: int, batchsize: int) \ + -> 'list[tuple[int, str]]': + x = self._db.execute('''SELECT idx.pk, + url || "/" || REPLACE(REPLACE(path_name, '#', '%23'), '?', '%3F') + FROM idx INNER JOIN urls ON urls.pk=base_url + WHERE done=? LIMIT ?;''', [done, batchsize]) + return x.fetchall() + + def setAllUndone(self, *, whereDone: int) -> None: + self._db.execute('UPDATE idx SET done=0 WHERE done=?;', [whereDone]) + self._db.commit() + + # Finalize / Postprocessing + + def setError(self, uid: int, *, done: int) -> None: + self._db.execute('UPDATE idx SET done=? WHERE pk=?;', [done, uid]) + self._db.commit() + + def setPermanentError(self, uid: int) -> None: + ''' + Set done=4 and all file related columns to NULL. + Will also delete all plist, and image files for {uid} in CACHE_DIR + ''' + self._db.execute(''' + UPDATE idx SET done=4, min_os=NULL, platform=NULL, title=NULL, + bundle_id=NULL, version=NULL WHERE pk=?;''', [uid]) + self._db.commit() + for ext in ['.plist', '.png', '.jpg']: + fname = diskPath(uid, ext) + if fname.exists(): + os.remove(fname) + + def setDone(self, uid: int) -> None: + plist_path = diskPath(uid, '.plist') + if not plist_path.exists(): + return + with open(plist_path, 'rb') as fp: + try: + plist = plistlib.load(fp) + except Exception as e: + print(f'ERROR: [{uid}] PLIST: {e}', file=stderr) + self.setError(uid, done=3) + return + + bundleId = plist.get('CFBundleIdentifier') + title = plist.get('CFBundleDisplayName') or plist.get('CFBundleName') + version = str(plist.get('CFBundleVersion', '')) + v_short = str(plist.get('CFBundleShortVersionString', '')) + if not version: + version = v_short + if version != v_short and v_short: + version = f'{version} ({v_short})' + minOS = [int(x) for x in plist.get('MinimumOSVersion', '0').split('.')] + minOS += [0, 0, 0] # ensures at least 3 components are given + platforms = sum(1 << int(x) for x in plist.get('UIDeviceFamily', [])) + if not platforms and minOS[0] in [0, 1, 2, 3]: + platforms = 1 << 1 # fallback to iPhone for old versions + + self._db.execute(''' + UPDATE idx SET + done=1, min_os=?, platform=?, title=?, bundle_id=?, version=? + WHERE pk=?;''', [ + (minOS[0] * 10000 + minOS[1] * 100 + minOS[2]) or None, + platforms or None, + title or None, + bundleId or None, + version or None, + uid, + ]) + self._db.commit() + + +############################################### +# [add] Process HTML link list +############################################### + +def crawler(url: str) -> None: + match = re_archive_url.match(url) + if not match: + print(f'[WARN] not an archive.org url. Ignoring "{url}"', file=stderr) + return + downloadListArchiveOrg(match.group(1)) + + +def downloadListArchiveOrg(archiveId: str) -> None: + baseUrl = f'https://archive.org/download/{archiveId}' + baseUrlId = CacheDB().insertBaseUrl(baseUrl) + json_file = CACHE_DIR / 'url_cache' / (str(baseUrlId) + '.json.gz') + json_file.parent.mkdir(exist_ok=True) + # store json for later + if not json_file.exists(): + print(f'load: [{baseUrlId}] {baseUrl}') + req = Request(f'https://archive.org/metadata/{archiveId}/files') + req.add_header('Accept-Encoding', 'deflate, gzip') + with urlopen(req) as page: + with open(json_file, 'wb') as fp: + while True: + block = page.read(8096) + if not block: + break + fp.write(block) + # read saved json from disk + with gzip.open(json_file, 'rb') as fp: + data = json.load(fp) + # process and add to DB + entries = [(baseUrlId, x['name'], int(x.get('size', 0))) + for x in data['result'] + if x['source'] == 'original' and x['name'].endswith('.ipa')] + inserted = CacheDB().insertIpaUrls(entries) + print(f'new links added: {inserted} of {len(entries)}') + + +############################################### +# [run] Process pending urls from DB +############################################### + +def processPending(): + processed = 0 + with Pool(processes=8) as pool: + while True: + DB = CacheDB() + pending = DB.count(done=0) + batch = DB.getPendingQueue(done=0, batchsize=100) + del DB + if not batch: + print('Queue empty. done.') + break + + batch = [(processed + i + 1, pending - i - 1, *x) + for i, x in enumerate(batch)] + + result = pool.starmap_async(procSinglePending, batch).get() + processed += len(result) + DB = CacheDB() + for uid, success in result: + fsize = onceReadSizeFromFile(uid) + if fsize: + DB.setFilesize(uid, fsize) + if success: + DB.setDone(uid) + else: + DB.setError(uid, done=3) + del DB + DB = CacheDB() + err_count = DB.count(done=3) + if err_count > 0: + print() + print('URLs with Error:', err_count) + for uid, url in DB.getPendingQueue(done=3, batchsize=10): + print(f' - [{uid}] {url}') + + +def procSinglePending(processed: int, pending: int, uid: int, url: str) \ + -> 'tuple[int, bool]': + humanUrl = url.split('archive.org/download/')[-1] + print(f'[{processed}|{pending} queued]: load[{uid}] {humanUrl}') + try: + return uid, loadIpa(uid, url) + except Exception as e: + print(f'ERROR: [{uid}] {e}', file=stderr) + return uid, False + + +def onceReadSizeFromFile(uid: int) -> 'int|None': + size_path = diskPath(uid, '.size') + if size_path.exists(): + with open(size_path, 'r') as fp: + size = int(fp.read()) + os.remove(size_path) + return size + return None + + +############################################### +# Process IPA zip +############################################### + +def loadIpa(uid: int, url: str, *, + overwrite: bool = False, image_only: bool = False) -> bool: + basename = diskPath(uid, '') + basename.parent.mkdir(exist_ok=True) + img_path = basename.with_suffix('.png') + plist_path = basename.with_suffix('.plist') + if not overwrite and plist_path.exists(): + return True + + with RemoteZip(url) as zip: + if USE_ZIP_FILESIZE: + filesize = zip.fp.tell() if zip.fp else 0 + with open(basename.with_suffix('.size'), 'w') as fp: + fp.write(str(filesize)) + + app_name = None + artwork = False + zip_listing = zip.infolist() + + for entry in zip_listing: + fn = entry.filename.lstrip('/') + plist_match = re_info_plist.match(fn) + if fn == 'iTunesArtwork': + extractZipEntry(zip, entry, img_path) + artwork = os.path.getsize(img_path) > 0 + elif plist_match: + app_name = plist_match.group(1) + if not image_only: + extractZipEntry(zip, entry, plist_path) + + # if no iTunesArtwork found, load file referenced in plist + if not artwork and app_name and plist_path.exists(): + with open(plist_path, 'rb') as fp: + icon_names = iconNameFromPlist(plistlib.load(fp)) + print(icon_names) + icon = expandImageName(zip_listing, app_name, icon_names) + print(icon) + if icon: + extractZipEntry(zip, icon, img_path) + + return plist_path.exists() + + +def extractZipEntry(zip: 'RemoteZip', zipInfo: 'ZipInfo', dest_filename: Path): + with zip.open(zipInfo) as src: + with open(dest_filename, 'wb') as tgt: + tgt.write(src.read()) + + +############################################### +# Icon name extraction +############################################### +RESOLUTION_ORDER = ['3x', '2x', '180', '167', '152', '120'] + + +def expandImageName( + zip_listing: 'list[ZipInfo]', appName: str, iconList: 'list[str]' +) -> 'ZipInfo|None': + for iconName in iconList + ['Icon', 'icon']: + zipPath = f'Payload/{appName}/{iconName}' + matchingNames = [x.filename.split('/', 2)[-1] for x in zip_listing + if x.filename.lstrip('/').startswith(zipPath)] + if len(matchingNames) > 0: + for bestName in sortedByResolution(matchingNames): + bestPath = f'Payload/{appName}/{bestName}' + for x in zip_listing: + if x.filename.lstrip('/') == bestPath and x.file_size > 0: + return x + return None + + +def unpackNameListFromPlistDict(bundleDict: 'dict|None') -> 'list[str]|None': + if not bundleDict: + return None + primaryDict = bundleDict.get('CFBundlePrimaryIcon', {}) + icons = primaryDict.get('CFBundleIconFiles') + if not icons: + singular = primaryDict.get('CFBundleIconName') + if singular: + return [singular] + return icons + + +def resolutionIndex(icon_name: str): + if 'small' in icon_name.lower(): + return 99 + for i, match in enumerate(RESOLUTION_ORDER): + if match in icon_name: + return i + return 50 + + +def sortedByResolution(icons: 'list[str]') -> 'list[str]': + icons.sort(key=resolutionIndex) + return icons + + +def iconNameFromPlist(plist: dict) -> 'list[str]': + # Check for CFBundleIcons (since 5.0) + icons = unpackNameListFromPlistDict(plist.get('CFBundleIcons')) + if not icons: + icons = unpackNameListFromPlistDict(plist.get('CFBundleIcons~ipad')) + if not icons: + # Check for CFBundleIconFiles (since 3.2) + icons = plist.get('CFBundleIconFiles') + if not icons: + # key found on iTunesU app + icons = plist.get('Icon files') + if not icons: + # Check for CFBundleIconFile (legacy, before 3.2) + icon = plist.get('CFBundleIconFile') # may be None + return [icon] if icon else [] + return sortedByResolution(icons) + + +############################################### +# [json] Export to json +############################################### + +def export_json(): + DB = CacheDB() + url_map = DB.jsonUrlMap() + maxUrlId = max(url_map.keys()) + submap = {} + total = DB.count(done=1) + with open(CACHE_DIR / 'ipa.json', 'w') as fp: + fp.write('[') + for i, entry in enumerate(DB.enumJsonIpa(done=1)): + if i % 113 == 0: + print(f'\rprocessing [{i}/{total}]', end='') + # if path_name is in a subdirectory, reindex URLs + if '/' in entry[7]: + baseurl = url_map[entry[6]] + sub_dir, sub_file = entry[7].split('/', 1) + newurl = baseurl + '/' + sub_dir + subIdx = submap.get(newurl, None) + if subIdx is None: + maxUrlId += 1 + submap[newurl] = maxUrlId + subIdx = maxUrlId + entry = list(entry) + entry[6] = subIdx + entry[7] = sub_file + + fp.write(json.dumps(entry, separators=(',', ':')) + ',\n') + fp.seek(max(fp.tell(), 3) - 2) + fp.write(']') + print('\r', end='') + print(f'write ipa.json: {total} entries') + + for newurl, newidx in submap.items(): + url_map[newidx] = newurl + with open(CACHE_DIR / 'urls.json', 'w') as fp: + fp.write(json.dumps(url_map, separators=(',\n', ':'))) + print(f'write urls.json: {len(url_map)} entries') + + +def export_filesize(): + ignored = 0 + written = 0 + for i, (uid, fsize) in enumerate(CacheDB().enumFilesize()): + size_path = diskPath(uid, '.size') + if not size_path.exists(): + with open(size_path, 'w') as fp: + fp.write(str(fsize)) + written += 1 + else: + ignored += 1 + if i % 113 == 0: + print(f'\r{written} files written. {ignored} ignored', end='') + print(f'\r{written} files written. {ignored} ignored. done.') + + +############################################### +# Helper +############################################### + +def diskPath(uid: int, ext: str) -> Path: + return CACHE_DIR / str(uid // 1000) / f'{uid}{ext}' + + +def printProgress(blocknum, bs, size): + percent = (blocknum * bs) / size + done = "#" * int(40 * percent) + print(f'\r[{done:<40}] {percent:.1%}', end='') + +# def b64e(text: str) -> str: +# return b64encode(text.encode('utf8')).decode('ascii') + + +if __name__ == '__main__': + main() diff --git a/script.js b/script.js new file mode 100644 index 00000000..b711d481 --- /dev/null +++ b/script.js @@ -0,0 +1,289 @@ +var DB = []; +var DB_result = []; +var baseUrls = {}; +var PER_PAGE = 30; +var plistGeneratorUrl = ''; // will append ?d= + +/* + * Init + */ + +function setMessage(msg) { + document.getElementById('content').innerHTML = msg; +} + +function loadFile(url, onErrFn, fn) { + try { + const xhr = new XMLHttpRequest(); + xhr.open('GET', url, true); + xhr.responseType = 'text'; + xhr.onload = function (e) { fn(e.target.response); }; + xhr.onerror = function (e) { onErrFn('Server or network error.'); }; + xhr.send(); + } catch (error) { + onErrFn(error); + } +} + +function loadDB() { + try { + loadConfig(); + } catch (error) { + alert(error); + } + setMessage('Loading base-urls ...'); + loadFile('data/urls.json', setMessage, function (data) { + baseUrls = JSON.parse(data); + setMessage('Loading database ...'); + loadFile('data/ipa.json', setMessage, function (data) { + DB = JSON.parse(data); + setMessage(DB.length); + setMessage('ready. Links in database: ' + DB.length); + }); + }); +} + +function loadConfig() { + const params = location.hash.substring(1).split('&'); + params.forEach(function (param) { + const pair = param.split('=', 2); + const key = pair[0]; + const value = pair[1]; + const input = document.getElementById(key); + if (input) { + input.value = value; + if (key == 'plistServer') { + setPlistGen(); + } + } + }); +} + +function saveConfig() { + const data = []; + NodeList.prototype.forEach = Array.prototype.forEach; // fix for < iOS 9.3 + document.querySelectorAll('input,select').forEach(function (e) { + if (e.value) { + data.push(e.id + '=' + e.value); + } + }); + this.location.hash = '#' + data.join('&'); +} + +/* + * Search + */ + +function applySearch() { + const term = document.getElementById('search').value.trim().toLowerCase(); + const bundle = document.getElementById('bundleid').value.trim().toLowerCase(); + const minos = document.getElementById('minos').value; + const maxos = document.getElementById('maxos').value; + const platform = document.getElementById('device').value; + + const minV = minos ? strToVersion(minos) : 0; + const maxV = maxos ? strToVersion(maxos) : 9999999; + const device = platform ? 1 << platform : 255; // all flags + const lenBundle = bundle.length; + + // [7, 2,20200,"180","com.headcasegames.180","1.0",1,"180.ipa", 189930], + // [pk, platform, minOS, title, bundleId, version, baseUrl, pathName, size] + DB_result = []; + DB.forEach(function (ipa, i) { + if (ipa[2] < minV || ipa[2] > maxV || !(ipa[1] & device)) { + return; + } + if (bundle && ipa[4].substring(0, lenBundle).toLowerCase() !== bundle) { + return; + } + if (!term + || ipa[3].toLowerCase().indexOf(term) > -1 + || ipa[4].toLowerCase().indexOf(term) > -1 + || ipa[7].toLowerCase().indexOf(term) > -1 + ) { + DB_result.push(i); + } + }); +} + +function searchByBundleId(sender) { + document.getElementById('bundleid').value = sender.innerText; + searchIPA(); +} + +function searchIPA() { + applySearch(); + printIPA(); + saveConfig(); +} + +/* + * Output + */ + +function platformToStr(num) { + if (!num) { return '?'; } + return [ + num & (1 << 1) ? 'iPhone' : null, + num & (1 << 2) ? 'iPad' : null, + num & (1 << 3) ? 'TV' : null, + num & (1 << 4) ? 'Watch' : null, + ].filter(Boolean).join(', '); +} + +function versionToStr(num) { + if (!num) { return '?'; } + const major = Math.floor(num / 10000); + const minor = Math.floor(num / 100) % 100; + const patch = num % 100; + return major + '.' + minor + (patch ? '.' + patch : ''); +} + +function strToVersion(versionStr) { + const x = ((versionStr || '0') + '.0.0.0').split('.'); + return parseInt(x[0]) * 10000 + parseInt(x[1]) * 100 + parseInt(x[2]); +} + +function humanSize(size) { + var sizeIndex = 0; + while (size > 1024) { + size /= 1024; + sizeIndex += 1; + } + return size.toFixed(1) + ['kB', 'MB', 'GB'][sizeIndex]; +} + +function validUrl(url) { + return encodeURI(url).replace('#', '%23').replace('?', '%3F'); +} + +function entriesToDict(entry) { + const pk = entry[0]; + return { + pk: pk, + platform: entry[1], + minOS: entry[2], + title: entry[3], + bundleId: entry[4], + version: entry[5], + baseUrl: entry[6], + pathName: entry[7], + size: entry[8], + ipa_url: baseUrls[entry[6]] + '/' + entry[7], + img_url: 'data/' + Math.floor(pk / 1000) + '/' + pk + '.jpg', + } +} + +function entriesToStr(data) { + const template = document.getElementById('templates').querySelector('.entry').outerHTML; + var rv = ''; + for (var i = 0; i < data.length; i++) { + const entry = entriesToDict(DB[data[i]]); + rv += template + .replace('$IDX', data[i]) + .replace('$IMG', entry.img_url) + .replace('$TITLE', (entry.title || '?').replace('<', '<')) + .replace('$VERSION', entry.version) + .replace('$BUNDLEID', entry.bundleId) + .replace('$MINOS', versionToStr(entry.minOS)) + .replace('$PLATFORM', platformToStr(entry.platform)) + .replace('$SIZE', humanSize(entry.size)) + .replace('$URLNAME', entry.pathName.split('/').slice(-1)) // decodeURI + .replace('$URL', validUrl(entry.ipa_url)); + } + return rv; +} + +function printIPA(offset) { + if (!offset) { offset = 0; } + + const total = DB_result.length; + var content = '

Results: ' + total + '

'; + const page = Math.floor(offset / PER_PAGE); + const pages = Math.ceil(total / PER_PAGE); + if (pages > 1) { + content += paginationShort(page, pages); + } + content += entriesToStr(DB_result.slice(offset, offset + PER_PAGE)); + if (pages > 1) { + content += paginationShort(page, pages); + content += paginationFull(page, pages); + } + + document.getElementById('content').innerHTML = content; + window.scrollTo(0, 0); +} + +/* + * Pagination + */ + +function p(page) { + printIPA(page * PER_PAGE); +} + +function paginationShort(page, pages) { + return '
' + + '' + + '' + (page + 1) + ' / ' + pages + '' + + '' + + '
'; +} + +function paginationFull(page, pages) { + var rv = ''; +} + +/* + * Install on iDevice + */ + +function setPlistGen() { + const testURL = document.getElementById('plistServer').value; + const scheme = testURL.slice(0, 7); + if (scheme != 'http://' && scheme != 'https:/') { + alert('URL must start with http:// or https://.'); + return; + } + loadFile(testURL + '?d=' + btoa('{"u":"1"}'), alert, function (data) { + if (data.trim().slice(0, 6) != 'div { + display: inline-block; + margin: 8px 8px 8px 0; +} +.entry img { + width: 74px; + height: 74px; + margin: 8px; + border-radius: 17%; + vertical-align: top; +} +.entry button { + display: block; + width: 74px; + margin: 0 auto; +} + +/* Pagination */ +.shortpage { + text-align: center; + margin: 30px; +} +.shortpage>span { + margin: 0 20px; +} +.shortpage>button { + font-size: 1em; +} +.shortpage>button:first-child { + float: left; +} +.shortpage>button:last-child { + float: right; +} +#pagination>a, #pagination>b { + display: inline-block; + padding: 4px; +} \ No newline at end of file diff --git a/tools/check_error_no_plist.sh b/tools/check_error_no_plist.sh new file mode 100755 index 00000000..b30f7975 --- /dev/null +++ b/tools/check_error_no_plist.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# Will print all ids where error is set to permanent but a plist exists. +cd "$(dirname "$0")" || exit + +while read -r uid; do + fname="data/$((uid/1000))/$uid" + if [ -f "../$fname.plist" ]; then echo "$fname.plist"; fi + if [ -f "../$fname.png" ]; then echo "$fname.png"; fi + if [ -f "../$fname.jpg" ]; then echo "$fname.jpg"; fi +done < <(sqlite3 ../data/ipa_cache.db 'SELECT pk FROM idx WHERE done=4;') diff --git a/tools/check_missing_img.sh b/tools/check_missing_img.sh new file mode 100755 index 00000000..92a9d9fd --- /dev/null +++ b/tools/check_missing_img.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# Find files where a plist exists but an image is missing (should be run after image_optim). +cd "$(dirname "$0")" || exit + +for file in ../data/*/*.plist; do + if [ ! -f "${file%.plist}.jpg" ]; then + idx=${file##*/} + echo "${idx%.*}"; + fi; +done diff --git a/tools/convert_plist.sh b/tools/convert_plist.sh new file mode 100755 index 00000000..3bcde251 --- /dev/null +++ b/tools/convert_plist.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# Some Info.plist files are in a json-like format. This will convert them to XML. +cd "$(dirname "$0")" || exit + +if [ $# = 0 ]; then + echo 'Missing uid(s) parameter' + exit 0 +fi + +for uid in "$@"; do + fname=data/$((uid/1000))/$uid.plist + if [ -f "$fname" ]; then + res=$(/usr/libexec/PlistBuddy -x -c print "../$fname") + if [ $? ]; then + echo "overwrite $fname" + echo "$res" > "../$fname" + fi + else + echo "does not exist: $fname" + fi +done diff --git a/tools/image_optim.sh b/tools/image_optim.sh new file mode 100755 index 00000000..6dc289eb --- /dev/null +++ b/tools/image_optim.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# Convert images from .png to .jpg + reduce resolution + shrink filesize. +MAX_SIZE=128 +BATCH_SIZE=50 +tmp=() + +cd "$(dirname "$0")" || exit + + +imageOptim() { + open --new --wait-apps --background -b net.pornel.ImageOptim --args "$@" +} + +optimize() { + if [ "${#tmp[@]}" -ge "$1" ]; then + echo "imageOptim on ${#tmp[@]} files" + imageOptim "${tmp[@]}" + tmp=() + fi +} + +downscale() { + IN_FILE=$1 + OUT_FILE=${IN_FILE%.png}.jpg + w=$(sips -g pixelWidth "$IN_FILE" | cut -d: -f2 | tail -1) + if [ "$w" -gt $MAX_SIZE ]; then w=$MAX_SIZE; fi + sips -Z "$w" "$IN_FILE" -s format jpeg -o "$OUT_FILE" 1> /dev/null + tmp+=("$PWD/$OUT_FILE") + optimize $BATCH_SIZE +} + +# using glob is fine because filenames do not contain spaces +total=$(echo ../data/*/*.png | wc -w) +total=${total##* } +if [ "$total" -lt 2 ]; then + if [ "$(echo ../data/*/*.png)" = '../data/*/*.png' ]; then + echo "Nothing to do." + exit 0; + fi +fi + +i=0 +for file in ../data/*/*.png; do + i=$((i+1)) + echo "[$i/$total] sips $file" + downscale "$file" + if [ -f "${file%.png}.jpg" ]; then + rm "$file" + fi +done + +optimize 1 diff --git a/tools/plist_server.py b/tools/plist_server.py new file mode 100755 index 00000000..506f74a7 --- /dev/null +++ b/tools/plist_server.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +from http.server import BaseHTTPRequestHandler, HTTPServer +from base64 import b64decode +import socket +import json + + +def generatePlist(data: dict) -> str: + return f''' + +itemsassets +kindsoftware-package +url{data.get('u')} + +kinddisplay-image +needs-shine +url{data.get('i')} +metadata +bundle-identifier{data.get('b')} +bundle-version{data.get('v')} +kindsoftware +title{data.get('n')} +''' # noqa: E501 + + +class PlistServer(BaseHTTPRequestHandler): + def do_GET(self): + try: + b64 = self.path.split('?d=')[-1] + '==' + print(b64decode(b64)) + data = json.loads(b64decode(b64)) # type: dict + rv = generatePlist(data) + except Exception as e: + print(e) + rv = '' + self.send_response(200) + self.send_header('Access-Control-Allow-Origin', '*') + if rv: + self.send_header('Content-type', 'application/xml') + self.end_headers() + self.wfile.write(bytes(rv, 'utf-8') if rv else b'Parsing error') + + +def getLocalIp(): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(('10.255.255.255', 80)) + ip = s.getsockname()[0] + s.close() + return ip + + +if __name__ == '__main__': + webServer = HTTPServer(('0.0.0.0', 8026), PlistServer) + print('Server started http://%s:%s' % (getLocalIp(), 8026)) + try: + webServer.serve_forever() + except KeyboardInterrupt: + pass + webServer.server_close() diff --git a/tools/plist_server/index.php b/tools/plist_server/index.php new file mode 100644 index 00000000..f1df1fc9 --- /dev/null +++ b/tools/plist_server/index.php @@ -0,0 +1,24 @@ +u) { + header('Content-Type: application/xml'); + echo ' + +itemsassets +kindsoftware-package +url'.$X->u.' + +kinddisplay-image +needs-shine +url'.$X->i.' +metadata +bundle-identifier'.$X->b.' +bundle-version'.$X->v.' +kindsoftware +title'.$X->n.' +'; +} else { + echo 'Parsing error.'; +} +?> \ No newline at end of file