From 1d9629566c2faa3cf00d8e2fb2ee817aa6e88167 Mon Sep 17 00:00:00 2001
From: relikd <info@relikd.de>
Date: Wed, 13 Apr 2022 15:41:57 +0200
Subject: [PATCH] efficient build

- postpone building until really needed
- rebuild only if artifacts change
- no build on source update
- prune takes current resolver state instead of global var
---
 lektor_groupby/backref.py  |  59 +++++++++++++++++
 lektor_groupby/groupby.py  |  57 +++++++++--------
 lektor_groupby/model.py    |  66 +++++++++++++++++++
 lektor_groupby/plugin.py   |  88 ++++++++++++++-----------
 lektor_groupby/pruner.py   |  30 +++++----
 lektor_groupby/resolver.py |  70 +++++++++++---------
 lektor_groupby/vobj.py     |  75 +++++-----------------
 lektor_groupby/watcher.py  | 127 +++++++------------------------------
 8 files changed, 302 insertions(+), 270 deletions(-)
 create mode 100644 lektor_groupby/backref.py
 create mode 100644 lektor_groupby/model.py

diff --git a/lektor_groupby/backref.py b/lektor_groupby/backref.py
new file mode 100644
index 0000000..892f95f
--- /dev/null
+++ b/lektor_groupby/backref.py
@@ -0,0 +1,59 @@
+from lektor.context import get_ctx
+from typing import TYPE_CHECKING, Iterator
+from weakref import WeakSet
+if TYPE_CHECKING:
+    from lektor.builder import Builder
+    from lektor.db import Record
+    from .groupby import GroupBy
+    from .vobj import GroupBySource
+
+
+class GroupByRef:
+    @staticmethod
+    def of(builder: 'Builder') -> 'GroupBy':
+        ''' Get the GroupBy object of a builder. '''
+        return builder.__groupby  # type:ignore[attr-defined,no-any-return]
+
+    @staticmethod
+    def set(builder: 'Builder', groupby: 'GroupBy') -> None:
+        ''' Set the GroupBy object of a builder. '''
+        builder.__groupby = groupby  # type: ignore[attr-defined]
+
+
+class VGroups:
+    @staticmethod
+    def of(record: 'Record') -> WeakSet:
+        '''
+        Return the (weak) set of virtual objects of a page.
+        Creates a new set if it does not exist yet.
+        '''
+        try:
+            wset = record.__vgroups  # type: ignore[attr-defined]
+        except AttributeError:
+            wset = WeakSet()
+            record.__vgroups = wset  # type: ignore[attr-defined]
+        return wset  # type: ignore[no-any-return]
+
+    @staticmethod
+    def iter(record: 'Record', *keys: str, recursive: bool = False) \
+            -> Iterator['GroupBySource']:
+        ''' Extract all referencing groupby virtual objects from a page. '''
+        ctx = get_ctx()
+        if not ctx:
+            raise NotImplementedError("Shouldn't happen, where is my context?")
+        # get GroupBy object
+        builder = ctx.build_state.builder
+        groupby = GroupByRef.of(builder)
+        groupby.make_once(builder)  # ensure did cluster before
+        # manage config dependencies
+        for dep in groupby.dependencies:
+            ctx.record_dependency(dep)
+        # find groups
+        proc_list = [record]
+        while proc_list:
+            page = proc_list.pop(0)
+            if recursive and hasattr(page, 'children'):
+                proc_list.extend(page.children)  # type: ignore[attr-defined]
+            for vobj in VGroups.of(page):
+                if not keys or vobj.config.key in keys:
+                    yield vobj
diff --git a/lektor_groupby/groupby.py b/lektor_groupby/groupby.py
index abf918a..8d01bb7 100644
--- a/lektor_groupby/groupby.py
+++ b/lektor_groupby/groupby.py
@@ -1,12 +1,14 @@
-from lektor.builder import Builder, PathCache
-from lektor.db import Record  # typing
-from lektor.sourceobj import SourceObject  # typing
-
-from typing import Set, List
-from .vobj import GroupBySource  # typing
-from .config import Config, AnyConfig
-from .resolver import Resolver  # typing
+from lektor.builder import PathCache
+from lektor.db import Record  # isinstance
+from typing import TYPE_CHECKING, Set, List
+from .config import Config
 from .watcher import Watcher
+if TYPE_CHECKING:
+    from .config import AnyConfig
+    from lektor.builder import Builder
+    from lektor.sourceobj import SourceObject
+    from .resolver import Resolver
+    from .vobj import GroupBySource
 
 
 class GroupBy:
@@ -16,11 +18,12 @@ class GroupBy:
     The grouping is performed only once per build.
     '''
 
-    def __init__(self) -> None:
+    def __init__(self, resolver: 'Resolver') -> None:
         self._watcher = []  # type: List[Watcher]
         self._results = []  # type: List[GroupBySource]
+        self.resolver = resolver
 
-    def add_watcher(self, key: str, config: AnyConfig) -> Watcher:
+    def add_watcher(self, key: str, config: 'AnyConfig') -> Watcher:
         ''' Init Config and add to watch list. '''
         w = Watcher(Config.from_any(key, config))
         self._watcher.append(w)
@@ -32,8 +35,9 @@ class GroupBy:
             deps.update(w.config.dependencies)
         return deps
 
-    def queue_all(self, builder: Builder) -> None:
+    def queue_all(self, builder: 'Builder') -> None:
         ''' Iterate full site-tree and queue all children. '''
+        self.dependencies = self.get_dependencies()
         # remove disabled watchers
         self._watcher = [w for w in self._watcher if w.config.enabled]
         if not self._watcher:
@@ -45,30 +49,29 @@ class GroupBy:
         queue = builder.pad.get_all_roots()  # type: List[SourceObject]
         while queue:
             record = queue.pop()
-            self.queue_now(record)
             if hasattr(record, 'attachments'):
                 queue.extend(record.attachments)  # type: ignore[attr-defined]
             if hasattr(record, 'children'):
                 queue.extend(record.children)  # type: ignore[attr-defined]
+            if isinstance(record, Record):
+                for w in self._watcher:
+                    if w.should_process(record):
+                        w.process(record)
 
-    def queue_now(self, node: SourceObject) -> None:
-        ''' Process record immediatelly (No-Op if already processed). '''
-        if isinstance(node, Record):
-            for w in self._watcher:
-                if w.should_process(node):
-                    w.process(node)
-
-    def make_cluster(self, builder: Builder, resolver: Resolver) -> None:
+    def make_once(self, builder: 'Builder') -> None:
         ''' Perform groupby, iter over sources with watcher callback. '''
-        for w in self._watcher:
-            root = builder.pad.get(w.config.root)
-            for vobj in w.iter_sources(root):
-                self._results.append(vobj)
-                resolver.add(vobj)
-        self._watcher.clear()
+        if self._watcher:
+            self.resolver.reset()
+            for w in self._watcher:
+                root = builder.pad.get(w.config.root)
+                for vobj in w.iter_sources(root):
+                    self._results.append(vobj)
+                    self.resolver.add(vobj)
+            self._watcher.clear()
 
-    def build_all(self, builder: Builder) -> None:
+    def build_all(self, builder: 'Builder') -> None:
         ''' Create virtual objects and build sources. '''
+        self.make_once(builder)  # in case no page used the |vgroups filter
         path_cache = PathCache(builder.env)
         for vobj in self._results:
             if vobj.slug:
diff --git a/lektor_groupby/model.py b/lektor_groupby/model.py
new file mode 100644
index 0000000..1d45f06
--- /dev/null
+++ b/lektor_groupby/model.py
@@ -0,0 +1,66 @@
+from lektor.db import Database, Record  # typing
+from lektor.types.flow import Flow, FlowType
+from lektor.utils import bool_from_string
+from typing import Set, Dict, Tuple, Any, NamedTuple, Optional, Iterator
+
+
+class FieldKeyPath(NamedTuple):
+    fieldKey: str
+    flowIndex: Optional[int] = None
+    flowKey: Optional[str] = None
+
+
+class ModelReader:
+    '''
+    Find models and flow-models which contain attribute.
+    Flows are either returned directly (flatten=False) or
+    expanded so that each flow-block is yielded (flatten=True)
+    '''
+
+    def __init__(self, db: Database, attr: str, flatten: bool = False) -> None:
+        self.flatten = flatten
+        self._flows = {}  # type: Dict[str, Set[str]]
+        self._models = {}  # type: Dict[str, Dict[str, str]]
+        # find flow blocks containing attribute
+        for key, flow in db.flowblocks.items():
+            tmp1 = set(f.name for f in flow.fields
+                       if bool_from_string(f.options.get(attr, False)))
+            if tmp1:
+                self._flows[key] = tmp1
+        # find models and flow-blocks containing attribute
+        for key, model in db.datamodels.items():
+            tmp2 = {}  # Dict[str, str]
+            for field in model.fields:
+                if bool_from_string(field.options.get(attr, False)):
+                    tmp2[field.name] = '*'  # include all children
+                elif isinstance(field.type, FlowType) and self._flows:
+                    # only processed if at least one flow has attr
+                    fbs = field.type.flow_blocks
+                    # if fbs == None, all flow-blocks are allowed
+                    if fbs is None or any(x in self._flows for x in fbs):
+                        tmp2[field.name] = '?'  # only some flow blocks
+            if tmp2:
+                self._models[key] = tmp2
+
+    def read(self, record: Record) -> Iterator[Tuple[FieldKeyPath, Any]]:
+        ''' Enumerate all fields of a Record with attrib = True. '''
+        assert isinstance(record, Record)
+        for r_key, subs in self._models.get(record.datamodel.id, {}).items():
+            field = record[r_key]
+            if not field:
+                continue
+            if subs == '*':  # either normal field or flow type (all blocks)
+                if self.flatten and isinstance(field, Flow):
+                    for i, flow in enumerate(field.blocks):
+                        flowtype = flow['_flowblock']
+                        for f_key, block in flow._data.items():
+                            if f_key.startswith('_'):  # e.g., _flowblock
+                                continue
+                            yield FieldKeyPath(r_key, i, f_key), block
+                else:
+                    yield FieldKeyPath(r_key), field
+            else:  # always flow type (only some blocks)
+                for i, flow in enumerate(field.blocks):
+                    flowtype = flow['_flowblock']
+                    for f_key in self._flows.get(flowtype, []):
+                        yield FieldKeyPath(r_key, i, f_key), flow[f_key]
diff --git a/lektor_groupby/plugin.py b/lektor_groupby/plugin.py
index da6213d..659e6ce 100644
--- a/lektor_groupby/plugin.py
+++ b/lektor_groupby/plugin.py
@@ -1,14 +1,15 @@
-from lektor.builder import Builder  # typing
-from lektor.db import Page  # typing
+from lektor.db import Page  # isinstance
 from lektor.pluginsystem import Plugin  # subclass
-from lektor.sourceobj import SourceObject  # typing
-
-from typing import Iterator, Any
-from .vobj import GroupBySource, GroupByBuildProgram, VPATH, VGroups
+from typing import TYPE_CHECKING, Iterator, Any
+from .backref import GroupByRef, VGroups
 from .groupby import GroupBy
 from .pruner import prune
 from .resolver import Resolver
-from .watcher import GroupByCallbackArgs  # typing
+from .vobj import VPATH, GroupBySource, GroupByBuildProgram
+if TYPE_CHECKING:
+    from lektor.builder import Builder, BuildState
+    from lektor.sourceobj import SourceObject
+    from .watcher import GroupByCallbackArgs
 
 
 class GroupByPlugin(Plugin):
@@ -16,10 +17,51 @@ class GroupByPlugin(Plugin):
     description = 'Cluster arbitrary records with field attribute keyword.'
 
     def on_setup_env(self, **extra: Any) -> None:
+        self.has_changes = False
         self.resolver = Resolver(self.env)
         self.env.add_build_program(GroupBySource, GroupByBuildProgram)
         self.env.jinja_env.filters.update(vgroups=VGroups.iter)
 
+    def on_before_build(
+        self, builder: 'Builder', source: 'SourceObject', **extra: Any
+    ) -> None:
+        # before-build may be called before before-build-all (issue #1017)
+        # make sure it is always evaluated first
+        if isinstance(source, Page):
+            self._init_once(builder)
+
+    def on_after_build(self, build_state: 'BuildState', **extra: Any) -> None:
+        if build_state.updated_artifacts:
+            self.has_changes = True
+
+    def on_after_build_all(self, builder: 'Builder', **extra: Any) -> None:
+        # only rebuild if has changes (bypass idle builds)
+        # or the very first time after startup (url resolver & pruning)
+        if self.has_changes or not self.resolver.has_any:
+            self._init_once(builder).build_all(builder)  # updates resolver
+            self.has_changes = False
+
+    def on_after_prune(self, builder: 'Builder', **extra: Any) -> None:
+        # TODO: find a better way to prune unreferenced elements
+        prune(builder, VPATH, self.resolver.files)
+
+    # ------------
+    #   internal
+    # ------------
+
+    def _init_once(self, builder: 'Builder') -> GroupBy:
+        try:
+            return GroupByRef.of(builder)
+        except AttributeError:
+            groupby = GroupBy(self.resolver)
+            GroupByRef.set(builder, groupby)
+
+        self._load_quick_config(groupby)
+        # let other plugins register their @groupby.watch functions
+        self.emit('before-build-all', groupby=groupby, builder=builder)
+        groupby.queue_all(builder)
+        return groupby
+
     def _load_quick_config(self, groupby: GroupBy) -> None:
         ''' Load config file quick listeners. '''
         config = self.get_config()
@@ -31,39 +73,9 @@ class GroupByPlugin(Plugin):
             split = config.get(key + '.split')  # type: str
 
             @watcher.grouping()
-            def _fn(args: GroupByCallbackArgs) -> Iterator[str]:
+            def _fn(args: 'GroupByCallbackArgs') -> Iterator[str]:
                 val = args.field
                 if isinstance(val, str):
                     val = map(str.strip, val.split(split)) if split else [val]
                 if isinstance(val, (list, map)):
                     yield from val
-
-    def _init_once(self, builder: Builder) -> GroupBy:
-        try:
-            return builder.__groupby  # type:ignore[attr-defined,no-any-return]
-        except AttributeError:
-            groupby = GroupBy()
-            builder.__groupby = groupby  # type: ignore[attr-defined]
-
-        self.resolver.reset()
-        self._load_quick_config(groupby)
-        # let other plugins register their @groupby.watch functions
-        self.emit('before-build-all', groupby=groupby, builder=builder)
-        self.config_dependencies = groupby.get_dependencies()
-        groupby.queue_all(builder)
-        groupby.make_cluster(builder, self.resolver)
-        return groupby
-
-    def on_before_build(self, builder: Builder, source: SourceObject,
-                        **extra: Any) -> None:
-        # before-build may be called before before-build-all (issue #1017)
-        # make sure it is evaluated immediatelly
-        if isinstance(source, Page):
-            self._init_once(builder)
-
-    def on_after_build_all(self, builder: Builder, **extra: object) -> None:
-        self._init_once(builder).build_all(builder)
-
-    def on_after_prune(self, builder: Builder, **extra: object) -> None:
-        # TODO: find a better way to prune unreferenced elements
-        prune(builder, VPATH)
diff --git a/lektor_groupby/pruner.py b/lektor_groupby/pruner.py
index b6d7172..43e9514 100644
--- a/lektor_groupby/pruner.py
+++ b/lektor_groupby/pruner.py
@@ -2,29 +2,36 @@
 Static collector for build-artifact urls.
 All non-tracked VPATH-urls will be pruned after build.
 '''
-from lektor.builder import Builder  # typing
 from lektor.reporter import reporter  # report_pruned_artifact
 from lektor.utils import prune_file_and_folder
-
-_cache = set()
-# Note: this var is static or otherwise two instances of
-#       this module would prune each others artifacts.
+from typing import TYPE_CHECKING, Set, Iterable
+if TYPE_CHECKING:
+    from lektor.builder import Builder
 
 
-def track_not_prune(url: str) -> None:
-    ''' Add url to build cache to prevent pruning. '''
-    _cache.add(url.lstrip('/'))
+def _normalize_url_cache(url_cache: Iterable[str]) -> Set[str]:
+    cache = set()
+    for url in url_cache:
+        if url.endswith('/'):
+            url += 'index.html'
+        cache.add(url.lstrip('/'))
+    return cache
 
 
-def prune(builder: Builder, vpath: str) -> None:
-    ''' Remove previously generated, unreferenced Artifacts. '''
+def prune(builder: 'Builder', vpath: str, url_cache: Iterable[str]) -> None:
+    '''
+    Remove previously generated, unreferenced Artifacts.
+    All urls in url_cache must have a trailing "/index.html" (instead of "/")
+    and also, no leading slash, "blog/index.html" instead of "/blog/index.html"
+    '''
     vpath = '@' + vpath.lstrip('@')  # just in case of user error
     dest_path = builder.destination_path
+    url_cache = _normalize_url_cache(url_cache)
     con = builder.connect_to_database()
     try:
         with builder.new_build_state() as build_state:
             for url, file in build_state.iter_artifacts():
-                if url.lstrip('/') in _cache:
+                if url.lstrip('/') in url_cache:
                     continue  # generated in this build-run
                 infos = build_state.get_artifact_dependency_infos(url, [])
                 for artifact_name, _ in infos:
@@ -36,4 +43,3 @@ def prune(builder: Builder, vpath: str) -> None:
                     break  # there is only one VPATH-entry per source
     finally:
         con.close()
-    _cache.clear()
diff --git a/lektor_groupby/resolver.py b/lektor_groupby/resolver.py
index 1bab0ac..98b7162 100644
--- a/lektor_groupby/resolver.py
+++ b/lektor_groupby/resolver.py
@@ -1,11 +1,11 @@
-from lektor.db import Record
-from lektor.environment import Environment
-from lektor.sourceobj import SourceObject
+from lektor.db import Record  # isinstance
 from lektor.utils import build_url
-
-from typing import Dict, List, Tuple, Optional
-from .config import Config  # typing
-from .vobj import GroupBySource, VPATH
+from typing import TYPE_CHECKING, Dict, List, Tuple, Optional, Iterable
+from .vobj import VPATH, GroupBySource
+if TYPE_CHECKING:
+    from lektor.environment import Environment
+    from lektor.sourceobj import SourceObject
+    from .config import Config
 
 
 class Resolver:
@@ -14,31 +14,18 @@ class Resolver:
     Init will subscribe to @urlresolver and @virtualpathresolver.
     '''
 
-    def __init__(self, env: Environment) -> None:
+    def __init__(self, env: 'Environment') -> None:
         self._data = {}  # type: Dict[str, Tuple[str, Config]]
+        env.urlresolver(self.resolve_server_path)
+        env.virtualpathresolver(VPATH.lstrip('@'))(self.resolve_virtual_path)
 
-        # Local server only: resolve /tag/rss/ -> /tag/rss/index.html
-        @env.urlresolver
-        def dev_server_path(node: SourceObject, pieces: List[str]) \
-                -> Optional[GroupBySource]:
-            if isinstance(node, Record):
-                rv = self._data.get(build_url([node.url_path] + pieces))
-                if rv:
-                    return GroupBySource(node, group=rv[0], config=rv[1])
-            return None
+    @property
+    def has_any(self) -> bool:
+        return bool(self._data)
 
-        # Admin UI only: Prevent server error and null-redirect.
-        @env.virtualpathresolver(VPATH.lstrip('@'))
-        def virtual_path(node: SourceObject, pieces: List[str]) \
-                -> Optional[GroupBySource]:
-            if isinstance(node, Record) and len(pieces) >= 2:
-                path = node['_path']  # type: str
-                key, grp, *_ = pieces
-                for group, conf in self._data.values():
-                    if key == conf.key and path == conf.root:
-                        if conf.slugify(group) == grp:
-                            return GroupBySource(node, group, conf)
-            return None
+    @property
+    def files(self) -> Iterable[str]:
+        return self._data
 
     def reset(self) -> None:
         ''' Clear previously recorded virtual objects. '''
@@ -48,3 +35,28 @@ class Resolver:
         ''' Track new virtual object (only if slug is set). '''
         if vobj.slug:
             self._data[vobj.url_path] = (vobj.group, vobj.config)
+
+    # ------------
+    #   Resolver
+    # ------------
+
+    def resolve_server_path(self, node: 'SourceObject', pieces: List[str]) \
+            -> Optional[GroupBySource]:
+        ''' Local server only: resolve /tag/rss/ -> /tag/rss/index.html '''
+        if isinstance(node, Record):
+            rv = self._data.get(build_url([node.url_path] + pieces))
+            if rv:
+                return GroupBySource(node, group=rv[0], config=rv[1])
+        return None
+
+    def resolve_virtual_path(self, node: 'SourceObject', pieces: List[str]) \
+            -> Optional[GroupBySource]:
+        ''' Admin UI only: Prevent server error and null-redirect. '''
+        if isinstance(node, Record) and len(pieces) >= 2:
+            path = node['_path']  # type: str
+            key, grp, *_ = pieces
+            for group, conf in self._data.values():
+                if key == conf.key and path == conf.root:
+                    if conf.slugify(group) == grp:
+                        return GroupBySource(node, group, conf)
+        return None
diff --git a/lektor_groupby/vobj.py b/lektor_groupby/vobj.py
index 017ad09..d9cb38a 100644
--- a/lektor_groupby/vobj.py
+++ b/lektor_groupby/vobj.py
@@ -1,16 +1,15 @@
 from lektor.build_programs import BuildProgram  # subclass
-from lektor.builder import Artifact  # typing
 from lektor.context import get_ctx
-from lektor.db import Record  # typing
 from lektor.environment import Expression
 from lektor.sourceobj import VirtualSourceObject  # subclass
 from lektor.utils import build_url
-
-from typing import Dict, List, Any, Optional, Iterator
-from weakref import WeakSet
-from .config import Config
-from .pruner import track_not_prune
+from typing import TYPE_CHECKING, Dict, List, Any, Optional, Iterator
+from .backref import VGroups
 from .util import report_config_error
+if TYPE_CHECKING:
+    from lektor.builder import Artifact
+    from lektor.db import Record
+    from .config import Config
 
 VPATH = '@groupby'  # potentially unsafe. All matching entries are pruned.
 
@@ -28,15 +27,16 @@ class GroupBySource(VirtualSourceObject):
 
     def __init__(
         self,
-        record: Record,
+        record: 'Record',
         group: str,
-        config: Config,
-        children: Optional[Dict[Record, List[Any]]] = None,
+        config: 'Config',
+        children: Optional[Dict['Record', List[Any]]] = None,
     ) -> None:
         super().__init__(record)
         self.key = config.slugify(group)
         self.group = group
         self.config = config
+        self._children = children or {}  # type: Dict[Record, List[Any]]
         # evaluate slug Expression
         if config.slug and '{key}' in config.slug:
             self.slug = config.slug.replace('{key}', self.key)
@@ -45,16 +45,12 @@ class GroupBySource(VirtualSourceObject):
             assert self.slug != Ellipsis, 'invalid config: ' + config.slug
         if self.slug and self.slug.endswith('/index.html'):
             self.slug = self.slug[:-10]
-        # make sure children are on the same pad
-        self._children = {}  # type: Dict[Record, List[Any]]
-        for child, extras in (children or {}).items():
-            if child.pad != record.pad:
-                child = record.pad.get(child.path)
-            self._children[child] = extras
-            VGroups.of(child).add(self)
         # extra fields
         for attr, expr in config.fields.items():
             setattr(self, attr, self._eval(expr, field='fields.' + attr))
+        # back-ref
+        for child in self._children:
+            VGroups.of(child).add(self)
 
     def _eval(self, value: Any, *, field: str) -> Any:
         ''' Internal only: evaluates Lektor config file field expression. '''
@@ -94,12 +90,12 @@ class GroupBySource(VirtualSourceObject):
     # -----------------------
 
     @property
-    def children(self) -> Dict[Record, List[Any]]:
+    def children(self) -> Dict['Record', List[Any]]:
         ''' Returns dict with page record key and (optional) extra value. '''
         return self._children
 
     @property
-    def first_child(self) -> Optional[Record]:
+    def first_child(self) -> Optional['Record']:
         ''' Returns first referencing page record. '''
         if self._children:
             return iter(self._children).__next__()
@@ -139,44 +135,6 @@ class GroupBySource(VirtualSourceObject):
             self.path, len(self._children))
 
 
-# -----------------------------------
-#          Reverse Reference
-# -----------------------------------
-
-class VGroups:
-    @staticmethod
-    def of(record: Record) -> WeakSet:
-        try:
-            wset = record.__vgroups  # type: ignore[attr-defined]
-        except AttributeError:
-            wset = WeakSet()
-            record.__vgroups = wset  # type: ignore[attr-defined]
-        return wset  # type: ignore[no-any-return]
-
-    @staticmethod
-    def iter(
-        record: Record,
-        *keys: str,
-        recursive: bool = False
-    ) -> Iterator[GroupBySource]:
-        ''' Extract all referencing groupby virtual objects from a page. '''
-        ctx = get_ctx()
-        # manage dependencies
-        if ctx:
-            for dep in ctx.env.plugins['groupby'].config_dependencies:
-                ctx.record_dependency(dep)
-        # find groups
-        proc_list = [record]
-        while proc_list:
-            page = proc_list.pop(0)
-            if recursive and hasattr(page, 'children'):
-                proc_list.extend(page.children)  # type: ignore[attr-defined]
-            for vobj in VGroups.of(page):
-                vobj.config.dependencies
-                if not keys or vobj.config.key in keys:
-                    yield vobj
-
-
 # -----------------------------------
 #           BuildProgram
 # -----------------------------------
@@ -190,9 +148,8 @@ class GroupByBuildProgram(BuildProgram):
             url += 'index.html'
         self.declare_artifact(url, sources=list(
             self.source.iter_source_filenames()))
-        track_not_prune(url)
 
-    def build_artifact(self, artifact: Artifact) -> None:
+    def build_artifact(self, artifact: 'Artifact') -> None:
         get_ctx().record_virtual_dependency(self.source)
         artifact.render_template_into(
             self.source.config.template, this=self.source)
diff --git a/lektor_groupby/watcher.py b/lektor_groupby/watcher.py
index 230b929..dc01d60 100644
--- a/lektor_groupby/watcher.py
+++ b/lektor_groupby/watcher.py
@@ -1,27 +1,17 @@
-from lektor.db import Database, Record  # typing
-from lektor.types.flow import Flow, FlowType
-from lektor.utils import bool_from_string
-
-from typing import Set, Dict, List, Tuple, Any, Union, NamedTuple
+from typing import TYPE_CHECKING, Dict, List, Tuple, Any, Union, NamedTuple
 from typing import Optional, Callable, Iterator, Generator
-from .vobj import GroupBySource
-from .config import Config
+from .model import ModelReader
 from .util import most_used_key
-
-
-# -----------------------------------
-#              Typing
-# -----------------------------------
-
-class FieldKeyPath(NamedTuple):
-    fieldKey: str
-    flowIndex: Optional[int] = None
-    flowKey: Optional[str] = None
+from .vobj import GroupBySource
+if TYPE_CHECKING:
+    from lektor.db import Database, Record
+    from .config import Config
+    from .model import FieldKeyPath
 
 
 class GroupByCallbackArgs(NamedTuple):
-    record: Record
-    key: FieldKeyPath
+    record: 'Record'
+    key: 'FieldKeyPath'
     field: Any  # lektor model data-field value
 
 
@@ -31,83 +21,15 @@ GroupingCallback = Callable[[GroupByCallbackArgs], Union[
 ]]
 
 
-# -----------------------------------
-#            ModelReader
-# -----------------------------------
-
-class GroupByModelReader:
-    ''' Find models and flow-models which contain attribute '''
-
-    def __init__(self, db: Database, attrib: str) -> None:
-        self._flows = {}  # type: Dict[str, Set[str]]
-        self._models = {}  # type: Dict[str, Dict[str, str]]
-        # find flow blocks containing attribute
-        for key, flow in db.flowblocks.items():
-            tmp1 = set(f.name for f in flow.fields
-                       if bool_from_string(f.options.get(attrib, False)))
-            if tmp1:
-                self._flows[key] = tmp1
-        # find models and flow-blocks containing attribute
-        for key, model in db.datamodels.items():
-            tmp2 = {}  # Dict[str, str]
-            for field in model.fields:
-                if bool_from_string(field.options.get(attrib, False)):
-                    tmp2[field.name] = '*'  # include all children
-                elif isinstance(field.type, FlowType) and self._flows:
-                    # only processed if at least one flow has attrib
-                    fbs = field.type.flow_blocks
-                    # if fbs == None, all flow-blocks are allowed
-                    if fbs is None or any(x in self._flows for x in fbs):
-                        tmp2[field.name] = '?'  # only some flow blocks
-            if tmp2:
-                self._models[key] = tmp2
-
-    def read(
-        self,
-        record: Record,
-        flatten: bool = False
-    ) -> Iterator[Tuple[FieldKeyPath, Any]]:
-        '''
-        Enumerate all fields of a Record with attrib = True.
-        Flows are either returned directly (flatten=False) or
-        expanded so that each flow-block is yielded (flatten=True)
-        '''
-        assert isinstance(record, Record)
-        for r_key, subs in self._models.get(record.datamodel.id, {}).items():
-            field = record[r_key]
-            if not field:
-                continue
-            if subs == '*':  # either normal field or flow type (all blocks)
-                if flatten and isinstance(field, Flow):
-                    for i, flow in enumerate(field.blocks):
-                        flowtype = flow['_flowblock']
-                        for f_key, block in flow._data.items():
-                            if f_key.startswith('_'):  # e.g., _flowblock
-                                continue
-                            yield FieldKeyPath(r_key, i, f_key), block
-                else:
-                    yield FieldKeyPath(r_key), field
-            else:  # always flow type (only some blocks)
-                for i, flow in enumerate(field.blocks):
-                    flowtype = flow['_flowblock']
-                    for f_key in self._flows.get(flowtype, []):
-                        yield FieldKeyPath(r_key, i, f_key), flow[f_key]
-
-
-# -----------------------------------
-#              Watcher
-# -----------------------------------
-
 class Watcher:
     '''
     Callback is called with (Record, FieldKeyPath, field-value).
     Callback may yield one or more (group, extra-info) tuples.
     '''
 
-    def __init__(self, config: Config) -> None:
+    def __init__(self, config: 'Config') -> None:
         self.config = config
-        self.flatten = True
-        self.callback = None  # type: GroupingCallback #type:ignore[assignment]
+        self._root = self.config.root
 
     def grouping(self, flatten: bool = True) \
             -> Callable[[GroupingCallback], None]:
@@ -122,28 +44,23 @@ class Watcher:
             self.callback = fn
         return _decorator
 
-    def initialize(self, db: Database) -> None:
+    def initialize(self, db: 'Database') -> None:
         ''' Reset internal state. You must initialize before each build! '''
         assert callable(self.callback), 'No grouping callback provided.'
-        self._root = self.config.root
-        self._model_reader = GroupByModelReader(db, attrib=self.config.key)
+        self._model_reader = ModelReader(db, self.config.key, self.flatten)
         self._state = {}  # type: Dict[str, Dict[Record, List[Any]]]
         self._group_map = {}  # type: Dict[str, List[str]]
-        self._processed = set()  # type: Set[str]
 
-    def should_process(self, node: Record) -> bool:
+    def should_process(self, node: 'Record') -> bool:
         ''' Check if record path is being watched. '''
         return node['_path'].startswith(self._root)
 
-    def process(self, record: Record) -> None:
+    def process(self, record: 'Record') -> None:
         '''
         Will iterate over all record fields and call the callback method.
         Each record is guaranteed to be processed only once.
         '''
-        if record.path in self._processed:
-            return
-        self._processed.add(record.path)
-        for key, field in self._model_reader.read(record, self.flatten):
+        for key, field in self._model_reader.read(record):
             _gen = self.callback(GroupByCallbackArgs(record, key, field))
             try:
                 obj = next(_gen)
@@ -161,10 +78,11 @@ class Watcher:
 
     def _persist(
         self,
-        record: Record,
-        key: FieldKeyPath,
+        record: 'Record',
+        key: 'FieldKeyPath',
         obj: Union[str, tuple]
     ) -> str:
+        ''' Update internal state. Return slugified string. '''
         group = obj if isinstance(obj, str) else obj[0]
         slug = self.config.slugify(group)
         # init group-key
@@ -176,14 +94,14 @@ class Watcher:
         # init group extras
         if record not in self._state[slug]:
             self._state[slug][record] = []
-        # (optional) append extra
+        # append extras (or default value)
         if isinstance(obj, tuple):
             self._state[slug][record].append(obj[1])
         else:
-            self._state[slug][record].append(key.flowKey or key.fieldKey)
+            self._state[slug][record].append(key.fieldKey)
         return slug
 
-    def iter_sources(self, root: Record) -> Iterator[GroupBySource]:
+    def iter_sources(self, root: 'Record') -> Iterator[GroupBySource]:
         ''' Prepare and yield GroupBySource elements. '''
         for key, children in self._state.items():
             group = most_used_key(self._group_map[key])
@@ -192,7 +110,6 @@ class Watcher:
         del self._model_reader
         del self._state
         del self._group_map
-        del self._processed
 
     def __repr__(self) -> str:
         return '<GroupByWatcher key="{}" enabled={} callback={}>'.format(