diff --git a/lektor_groupby/backref.py b/lektor_groupby/backref.py index 5c1b9d1..78eee0d 100644 --- a/lektor_groupby/backref.py +++ b/lektor_groupby/backref.py @@ -86,6 +86,7 @@ class VGroups: deps = set() # type: Set[str] for vobj in done_list: deps.update(vobj.config.dependencies) + # ctx.record_virtual_dependency(vobj) # TODO: needed? works without for dep in deps: ctx.record_dependency(dep) diff --git a/lektor_groupby/plugin.py b/lektor_groupby/plugin.py index 6ee93b3..21e1b94 100644 --- a/lektor_groupby/plugin.py +++ b/lektor_groupby/plugin.py @@ -5,7 +5,7 @@ from .backref import GroupByRef, VGroups from .groupby import GroupBy from .pruner import prune from .resolver import Resolver -from .vobj import VPATH, GroupBySource, GroupByBuildProgram +from .vobj import GroupBySource, GroupByBuildProgram if TYPE_CHECKING: from lektor.builder import Builder from lektor.sourceobj import SourceObject @@ -39,7 +39,7 @@ class GroupByPlugin(Plugin): def on_after_prune(self, builder: 'Builder', **extra: Any) -> None: # TODO: find a better way to prune unreferenced elements - prune(builder, VPATH, self.resolver.files) + prune(builder, self.resolver.files) # ------------ # internal diff --git a/lektor_groupby/pruner.py b/lektor_groupby/pruner.py index 6eef54f..fcada4f 100644 --- a/lektor_groupby/pruner.py +++ b/lektor_groupby/pruner.py @@ -1,45 +1,77 @@ ''' -Static collector for build-artifact urls. -All non-tracked VPATH-urls will be pruned after build. +Usage: + VirtualSourceObject.produce_artifacts() + -> remember url and later supply as `current_urls` + VirtualSourceObject.build_artifact() + -> `get_ctx().record_virtual_dependency(VirtualPruner())` ''' from lektor.reporter import reporter # report_pruned_artifact +from lektor.sourceobj import VirtualSourceObject # subclass from lektor.utils import prune_file_and_folder -from typing import TYPE_CHECKING, Set, Iterable +import os +from typing import TYPE_CHECKING, Set, List, Iterable if TYPE_CHECKING: from lektor.builder import Builder + from sqlite3 import Connection -def _normalize_url_cache(url_cache: Iterable[str]) -> Set[str]: +class VirtualPruner(VirtualSourceObject): + ''' Indicate that a generated VirtualSourceObject has pruning support. ''' + VPATH = '/@VirtualPruner' + + def __init__(self) -> None: + self._path = VirtualPruner.VPATH # if needed, add suffix variable + + @property + def path(self) -> str: # type: ignore[override] + return self._path + + +def prune(builder: 'Builder', current_urls: Iterable[str]) -> None: + ''' Removes previously generated, but now unreferenced Artifacts. ''' + dest_dir = builder.destination_path + con = builder.connect_to_database() + try: + previous = _query_prunable(con) + current = _normalize_urls(current_urls) + to_be_pruned = previous.difference(current) + for file in to_be_pruned: + reporter.report_pruned_artifact(file) # type: ignore + prune_file_and_folder(os.path.join( + dest_dir, file.strip('/').replace('/', os.path.sep)), dest_dir) + # if no exception raised, update db to remove obsolete references + _prune_db_artifacts(con, list(to_be_pruned)) + finally: + con.close() + + +# --------------------------- +# Internal helper methods +# --------------------------- + +def _normalize_urls(urls: Iterable[str]) -> Set[str]: cache = set() - for url in url_cache: + for url in urls: if url.endswith('/'): url += 'index.html' cache.add(url.lstrip('/')) return cache -def prune(builder: 'Builder', vpath: str, url_cache: Iterable[str]) -> None: - ''' - Remove previously generated, unreferenced Artifacts. - All urls in url_cache must have a trailing "/index.html" (instead of "/") - and also, no leading slash, "blog/index.html" instead of "/blog/index.html" - ''' - vpath = '@' + vpath.lstrip('@') # just in case of user error - dest_path = builder.destination_path - url_cache = _normalize_url_cache(url_cache) - con = builder.connect_to_database() - try: - with builder.new_build_state() as build_state: - for url, file in build_state.iter_artifacts(): - if url.lstrip('/') in url_cache: - continue # generated in this build-run - infos = build_state.get_artifact_dependency_infos(url, []) - for artifact_name, _ in infos: - if vpath not in artifact_name: - continue # we only care about our Virtuals - reporter.report_pruned_artifact(url) # type: ignore - prune_file_and_folder(file.filename, dest_path) - build_state.remove_artifact(url) - break # there is only one VPATH-entry per source - finally: - con.close() +def _query_prunable(conn: 'Connection') -> Set[str]: + ''' Query database for artifacts that have the VirtualPruner dependency ''' + cur = conn.cursor() + cur.execute('SELECT artifact FROM artifacts WHERE source = ?', + [VirtualPruner.VPATH]) + return set(x for x, in cur.fetchall()) + + +def _prune_db_artifacts(conn: 'Connection', urls: List[str]) -> None: + ''' Remove obsolete artifact references from database. ''' + MAX_VARS = 999 # Default SQLITE_MAX_VARIABLE_NUMBER. + cur = conn.cursor() + for i in range(0, len(urls), MAX_VARS): + batch = urls[i: i + MAX_VARS] + cur.execute('DELETE FROM artifacts WHERE artifact in ({})'.format( + ','.join(['?'] * len(batch))), batch) + conn.commit() diff --git a/lektor_groupby/vobj.py b/lektor_groupby/vobj.py index 23c510b..33fd441 100644 --- a/lektor_groupby/vobj.py +++ b/lektor_groupby/vobj.py @@ -7,6 +7,7 @@ from typing import ( TYPE_CHECKING, List, Any, Dict, Optional, Generator, Iterator, Iterable ) from .pagination import PaginationConfig +from .pruner import VirtualPruner from .query import FixedRecordsQuery from .util import most_used_key, insert_before_ext, build_url, cached_property if TYPE_CHECKING: @@ -250,7 +251,7 @@ class GroupByBuildProgram(BuildProgram): self.source.iter_source_filenames())) def build_artifact(self, artifact: 'Artifact') -> None: - get_ctx().record_virtual_dependency(self.source) + get_ctx().record_virtual_dependency(VirtualPruner()) artifact.render_template_into( self.source.config.template, this=self.source)