refactor: artifact pruning

This commit is contained in:
relikd
2022-12-08 00:32:37 +01:00
parent 7d668892a6
commit 491c06e22f
4 changed files with 67 additions and 33 deletions

View File

@@ -86,6 +86,7 @@ class VGroups:
deps = set() # type: Set[str]
for vobj in done_list:
deps.update(vobj.config.dependencies)
# ctx.record_virtual_dependency(vobj) # TODO: needed? works without
for dep in deps:
ctx.record_dependency(dep)

View File

@@ -5,7 +5,7 @@ from .backref import GroupByRef, VGroups
from .groupby import GroupBy
from .pruner import prune
from .resolver import Resolver
from .vobj import VPATH, GroupBySource, GroupByBuildProgram
from .vobj import GroupBySource, GroupByBuildProgram
if TYPE_CHECKING:
from lektor.builder import Builder
from lektor.sourceobj import SourceObject
@@ -39,7 +39,7 @@ class GroupByPlugin(Plugin):
def on_after_prune(self, builder: 'Builder', **extra: Any) -> None:
# TODO: find a better way to prune unreferenced elements
prune(builder, VPATH, self.resolver.files)
prune(builder, self.resolver.files)
# ------------
# internal

View File

@@ -1,45 +1,77 @@
'''
Static collector for build-artifact urls.
All non-tracked VPATH-urls will be pruned after build.
Usage:
VirtualSourceObject.produce_artifacts()
-> remember url and later supply as `current_urls`
VirtualSourceObject.build_artifact()
-> `get_ctx().record_virtual_dependency(VirtualPruner())`
'''
from lektor.reporter import reporter # report_pruned_artifact
from lektor.sourceobj import VirtualSourceObject # subclass
from lektor.utils import prune_file_and_folder
from typing import TYPE_CHECKING, Set, Iterable
import os
from typing import TYPE_CHECKING, Set, List, Iterable
if TYPE_CHECKING:
from lektor.builder import Builder
from sqlite3 import Connection
def _normalize_url_cache(url_cache: Iterable[str]) -> Set[str]:
class VirtualPruner(VirtualSourceObject):
''' Indicate that a generated VirtualSourceObject has pruning support. '''
VPATH = '/@VirtualPruner'
def __init__(self) -> None:
self._path = VirtualPruner.VPATH # if needed, add suffix variable
@property
def path(self) -> str: # type: ignore[override]
return self._path
def prune(builder: 'Builder', current_urls: Iterable[str]) -> None:
''' Removes previously generated, but now unreferenced Artifacts. '''
dest_dir = builder.destination_path
con = builder.connect_to_database()
try:
previous = _query_prunable(con)
current = _normalize_urls(current_urls)
to_be_pruned = previous.difference(current)
for file in to_be_pruned:
reporter.report_pruned_artifact(file) # type: ignore
prune_file_and_folder(os.path.join(
dest_dir, file.strip('/').replace('/', os.path.sep)), dest_dir)
# if no exception raised, update db to remove obsolete references
_prune_db_artifacts(con, list(to_be_pruned))
finally:
con.close()
# ---------------------------
# Internal helper methods
# ---------------------------
def _normalize_urls(urls: Iterable[str]) -> Set[str]:
cache = set()
for url in url_cache:
for url in urls:
if url.endswith('/'):
url += 'index.html'
cache.add(url.lstrip('/'))
return cache
def prune(builder: 'Builder', vpath: str, url_cache: Iterable[str]) -> None:
'''
Remove previously generated, unreferenced Artifacts.
All urls in url_cache must have a trailing "/index.html" (instead of "/")
and also, no leading slash, "blog/index.html" instead of "/blog/index.html"
'''
vpath = '@' + vpath.lstrip('@') # just in case of user error
dest_path = builder.destination_path
url_cache = _normalize_url_cache(url_cache)
con = builder.connect_to_database()
try:
with builder.new_build_state() as build_state:
for url, file in build_state.iter_artifacts():
if url.lstrip('/') in url_cache:
continue # generated in this build-run
infos = build_state.get_artifact_dependency_infos(url, [])
for artifact_name, _ in infos:
if vpath not in artifact_name:
continue # we only care about our Virtuals
reporter.report_pruned_artifact(url) # type: ignore
prune_file_and_folder(file.filename, dest_path)
build_state.remove_artifact(url)
break # there is only one VPATH-entry per source
finally:
con.close()
def _query_prunable(conn: 'Connection') -> Set[str]:
''' Query database for artifacts that have the VirtualPruner dependency '''
cur = conn.cursor()
cur.execute('SELECT artifact FROM artifacts WHERE source = ?',
[VirtualPruner.VPATH])
return set(x for x, in cur.fetchall())
def _prune_db_artifacts(conn: 'Connection', urls: List[str]) -> None:
''' Remove obsolete artifact references from database. '''
MAX_VARS = 999 # Default SQLITE_MAX_VARIABLE_NUMBER.
cur = conn.cursor()
for i in range(0, len(urls), MAX_VARS):
batch = urls[i: i + MAX_VARS]
cur.execute('DELETE FROM artifacts WHERE artifact in ({})'.format(
','.join(['?'] * len(batch))), batch)
conn.commit()

View File

@@ -7,6 +7,7 @@ from typing import (
TYPE_CHECKING, List, Any, Dict, Optional, Generator, Iterator, Iterable
)
from .pagination import PaginationConfig
from .pruner import VirtualPruner
from .query import FixedRecordsQuery
from .util import most_used_key, insert_before_ext, build_url, cached_property
if TYPE_CHECKING:
@@ -250,7 +251,7 @@ class GroupByBuildProgram(BuildProgram):
self.source.iter_source_filenames()))
def build_artifact(self, artifact: 'Artifact') -> None:
get_ctx().record_virtual_dependency(self.source)
get_ctx().record_virtual_dependency(VirtualPruner())
artifact.render_template_into(
self.source.config.template, this=self.source)