refactor: artifact pruning

This commit is contained in:
relikd
2022-12-08 00:32:37 +01:00
parent 7d668892a6
commit 491c06e22f
4 changed files with 67 additions and 33 deletions

View File

@@ -86,6 +86,7 @@ class VGroups:
deps = set() # type: Set[str] deps = set() # type: Set[str]
for vobj in done_list: for vobj in done_list:
deps.update(vobj.config.dependencies) deps.update(vobj.config.dependencies)
# ctx.record_virtual_dependency(vobj) # TODO: needed? works without
for dep in deps: for dep in deps:
ctx.record_dependency(dep) ctx.record_dependency(dep)

View File

@@ -5,7 +5,7 @@ from .backref import GroupByRef, VGroups
from .groupby import GroupBy from .groupby import GroupBy
from .pruner import prune from .pruner import prune
from .resolver import Resolver from .resolver import Resolver
from .vobj import VPATH, GroupBySource, GroupByBuildProgram from .vobj import GroupBySource, GroupByBuildProgram
if TYPE_CHECKING: if TYPE_CHECKING:
from lektor.builder import Builder from lektor.builder import Builder
from lektor.sourceobj import SourceObject from lektor.sourceobj import SourceObject
@@ -39,7 +39,7 @@ class GroupByPlugin(Plugin):
def on_after_prune(self, builder: 'Builder', **extra: Any) -> None: def on_after_prune(self, builder: 'Builder', **extra: Any) -> None:
# TODO: find a better way to prune unreferenced elements # TODO: find a better way to prune unreferenced elements
prune(builder, VPATH, self.resolver.files) prune(builder, self.resolver.files)
# ------------ # ------------
# internal # internal

View File

@@ -1,45 +1,77 @@
''' '''
Static collector for build-artifact urls. Usage:
All non-tracked VPATH-urls will be pruned after build. VirtualSourceObject.produce_artifacts()
-> remember url and later supply as `current_urls`
VirtualSourceObject.build_artifact()
-> `get_ctx().record_virtual_dependency(VirtualPruner())`
''' '''
from lektor.reporter import reporter # report_pruned_artifact from lektor.reporter import reporter # report_pruned_artifact
from lektor.sourceobj import VirtualSourceObject # subclass
from lektor.utils import prune_file_and_folder from lektor.utils import prune_file_and_folder
from typing import TYPE_CHECKING, Set, Iterable import os
from typing import TYPE_CHECKING, Set, List, Iterable
if TYPE_CHECKING: if TYPE_CHECKING:
from lektor.builder import Builder from lektor.builder import Builder
from sqlite3 import Connection
def _normalize_url_cache(url_cache: Iterable[str]) -> Set[str]: class VirtualPruner(VirtualSourceObject):
''' Indicate that a generated VirtualSourceObject has pruning support. '''
VPATH = '/@VirtualPruner'
def __init__(self) -> None:
self._path = VirtualPruner.VPATH # if needed, add suffix variable
@property
def path(self) -> str: # type: ignore[override]
return self._path
def prune(builder: 'Builder', current_urls: Iterable[str]) -> None:
''' Removes previously generated, but now unreferenced Artifacts. '''
dest_dir = builder.destination_path
con = builder.connect_to_database()
try:
previous = _query_prunable(con)
current = _normalize_urls(current_urls)
to_be_pruned = previous.difference(current)
for file in to_be_pruned:
reporter.report_pruned_artifact(file) # type: ignore
prune_file_and_folder(os.path.join(
dest_dir, file.strip('/').replace('/', os.path.sep)), dest_dir)
# if no exception raised, update db to remove obsolete references
_prune_db_artifacts(con, list(to_be_pruned))
finally:
con.close()
# ---------------------------
# Internal helper methods
# ---------------------------
def _normalize_urls(urls: Iterable[str]) -> Set[str]:
cache = set() cache = set()
for url in url_cache: for url in urls:
if url.endswith('/'): if url.endswith('/'):
url += 'index.html' url += 'index.html'
cache.add(url.lstrip('/')) cache.add(url.lstrip('/'))
return cache return cache
def prune(builder: 'Builder', vpath: str, url_cache: Iterable[str]) -> None: def _query_prunable(conn: 'Connection') -> Set[str]:
''' ''' Query database for artifacts that have the VirtualPruner dependency '''
Remove previously generated, unreferenced Artifacts. cur = conn.cursor()
All urls in url_cache must have a trailing "/index.html" (instead of "/") cur.execute('SELECT artifact FROM artifacts WHERE source = ?',
and also, no leading slash, "blog/index.html" instead of "/blog/index.html" [VirtualPruner.VPATH])
''' return set(x for x, in cur.fetchall())
vpath = '@' + vpath.lstrip('@') # just in case of user error
dest_path = builder.destination_path
url_cache = _normalize_url_cache(url_cache) def _prune_db_artifacts(conn: 'Connection', urls: List[str]) -> None:
con = builder.connect_to_database() ''' Remove obsolete artifact references from database. '''
try: MAX_VARS = 999 # Default SQLITE_MAX_VARIABLE_NUMBER.
with builder.new_build_state() as build_state: cur = conn.cursor()
for url, file in build_state.iter_artifacts(): for i in range(0, len(urls), MAX_VARS):
if url.lstrip('/') in url_cache: batch = urls[i: i + MAX_VARS]
continue # generated in this build-run cur.execute('DELETE FROM artifacts WHERE artifact in ({})'.format(
infos = build_state.get_artifact_dependency_infos(url, []) ','.join(['?'] * len(batch))), batch)
for artifact_name, _ in infos: conn.commit()
if vpath not in artifact_name:
continue # we only care about our Virtuals
reporter.report_pruned_artifact(url) # type: ignore
prune_file_and_folder(file.filename, dest_path)
build_state.remove_artifact(url)
break # there is only one VPATH-entry per source
finally:
con.close()

View File

@@ -7,6 +7,7 @@ from typing import (
TYPE_CHECKING, List, Any, Dict, Optional, Generator, Iterator, Iterable TYPE_CHECKING, List, Any, Dict, Optional, Generator, Iterator, Iterable
) )
from .pagination import PaginationConfig from .pagination import PaginationConfig
from .pruner import VirtualPruner
from .query import FixedRecordsQuery from .query import FixedRecordsQuery
from .util import most_used_key, insert_before_ext, build_url, cached_property from .util import most_used_key, insert_before_ext, build_url, cached_property
if TYPE_CHECKING: if TYPE_CHECKING:
@@ -250,7 +251,7 @@ class GroupByBuildProgram(BuildProgram):
self.source.iter_source_filenames())) self.source.iter_source_filenames()))
def build_artifact(self, artifact: 'Artifact') -> None: def build_artifact(self, artifact: 'Artifact') -> None:
get_ctx().record_virtual_dependency(self.source) get_ctx().record_virtual_dependency(VirtualPruner())
artifact.render_template_into( artifact.render_template_into(
self.source.config.template, this=self.source) self.source.config.template, this=self.source)