From 2eb8d820c05f780bf4ea08d590b275c31675b8e4 Mon Sep 17 00:00:00 2001
From: relikd <info@relikd.de>
Date: Fri, 25 Mar 2022 17:05:03 +0100
Subject: [PATCH] First version

---
 README.md         | 135 +++++++++++++++
 lektor_groupby.py | 429 ++++++++++++++++++++++++++++++++++++++++++++++
 setup.py          |  43 +++++
 3 files changed, 607 insertions(+)
 create mode 100644 README.md
 create mode 100644 lektor_groupby.py
 create mode 100644 setup.py

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a57ddf7
--- /dev/null
+++ b/README.md
@@ -0,0 +1,135 @@
+# Lektor Plugin: @groupby
+
+A generic grouping / clustering plugin. Can be used for tagging and similar tasks.
+
+
+## Usage: Simple Example
+
+Lets start with a simple example: adding a tags field to your model.
+Assuming you have a `blog-entry.ini` that is used for all children of `/blog` path.
+
+
+#### `models/blog-entry.ini`
+
+```ini
+[fields.tags]
+label = Tags
+type = strings
+myvar = true
+
+[fields.body]
+label = Content
+type = markdown
+```
+
+Notice we introduce a new attribute variable: `myvar = true`.
+The name can be anything here, we will come to that later.
+The only thing that matters is that the value is a boolean and set to true.
+
+Edit your blog entry and add these two new tags:
+
+```
+Awesome
+Latest News
+```
+
+Next, we need a plugin to add the groupby event listener.
+
+
+#### `packages/test/lektor_my_tags_plugin.py`
+
+```python
+def on_groupby_init(self, groupby, **extra):
+    @groupby.watch('/blog', 'myvar', flatten=True, template='myvar.html',
+                   slug='tag/{group}/index.html')
+    def do_myvar(args):
+        page = args.record  # extract additional info from source
+        fieldKey, flowIndex, flowKey = args.key  # or get field index directly
+        # val = page.get(fieldKey).blocks[flowIndex].get(flowKey)
+        value = args.field  # list type since model is 'strings' type
+        for tag in value:
+            yield slugify(tag), {'val': tag, 'tags_in_page': len(value)}
+```
+
+There are a few important things here:
+
+1. The first parameter (`'/blog'`) is the root page of the groupby.
+   All results will be placed under this directory, e.g., `/blog/tags/clean/`.
+   You can also just use `/`, in which case the same path would be `/tags/clean/`.
+   Or create multiple listeners, one for `/blog/` and another for `/projects/`, etc.
+2. The second parameter (`'myvar'`) must be the same attribute variable we used in our `blog-entry.ini` model.
+   The groupby plugin will traverse all models and search for this attribute name.
+3. Flatten determines how Flow elements are processed.
+   If `False`, the callback function `convert_myvar()` is called once per Flow element (if the Flow element has the `myvar` attribute attached).
+   If `True` (default), the callback is called for all Flow blocks individually.
+4. The template `myvar.html` is used to render the grouping page.
+   This parameter is optional.
+   If no explicit template is set, the default template `groupby-myvar.html` would be used. Where `myvar` is replaced with whatever attribute you chose.
+5. Finally, the slug `tag/{group}/index.html` is where the result is placed.
+   The default value for this parameter is `{attrib}/{group}/index.html`.
+   In our case, the default path would resolve to `myvar/awesome/index.html`.
+   We explicitly chose to replace the default slug with our own, which ignores the attrib path component and instead puts the result pages inside the `/tag` directory.
+   (PS: you could also use for example `t/{group}.html`, etc.)
+
+
+So much for the `args` parameter.
+The callback body **can** produce groupings but does not have to.
+If you choose to produce an entry, you have to `yield` a tuple pair of `(groupkey, extra-info)`.
+`groupkey` is used to combine & cluster pages and must be URL-safe.
+The `extra-info` is passed through to your template file.
+You can yield more than one entry per source or filter / ignore pages if you don't yield anything.
+Our simple example will generate the output files `tag/awesome/index.html` and `tag/latest-news/index.html`.
+
+Lets take a look at the html next.
+
+
+#### `templates/myvar.html`
+
+```html
+<h2>Path: {{ this | url(absolute=True) }}</h2>
+<div>This is: {{this}}</div>
+<ul>
+	{%- for child in this.children %}
+	<li>Page: {{ child.record.path }}, Name: {{ child.extra.val }}, Tag count: {{ child.extra.tags_in_page }}</li>
+	{%- endfor %}
+</ul>
+```
+
+Notice, we can use `child.record` to access the referenced page of the group cluster.
+`child.extra` contains the additional information we previously passed into the template.
+
+The final result of `tag/latest-news/index.html`:
+
+```
+Path: /tag/latest-news/
+This is: <GroupBySource attribute="myvar" group="latest-news" template="myvar.html" slug="tag/latest-news/" children=1>
+  -  Page: /blog/barss, Name: Latest News, Tag count: 2
+```
+
+
+## Usage: A slightly more complex example
+
+```python
+from lektor.markdown import Markdown
+from lektor.types.formats import MarkdownDescriptor
+from lektor.utils import slugify
+import re
+_regex = re.compile(r'{{([^}]{1,32})}}')
+
+def on_groupby_init(self, groupby, **extra):
+    @groupby.watch('/', 'inlinetags', slug='tags/{group}/')
+    def convert_inlinetags(args):
+        arr = args.field if isinstance(args.field, list) else [args.field]
+        for obj in arr:
+            if isinstance(obj, (Markdown, MarkdownDescriptor)):
+                obj = obj.source
+            if isinstance(obj, str) and str:
+                for match in _regex.finditer(obj):
+                    tag = match.group(1)
+                    yield slugify(tag), tag
+```
+
+This will find all model fields with attribute `inlinetags` and search for in-text occurrences of `{{Tagname}}`, etc.
+This generic approach does not care what data-type the field value is:
+`strings` fields will be expanded and enumerated, Markdown will be unpacked.
+You can combine this mere tag-detector with text-replacements to point to the actual tags-page.
diff --git a/lektor_groupby.py b/lektor_groupby.py
new file mode 100644
index 0000000..0f3c9e0
--- /dev/null
+++ b/lektor_groupby.py
@@ -0,0 +1,429 @@
+# -*- coding: utf-8 -*-
+import lektor.db  # typing
+from lektor.build_programs import BuildProgram
+from lektor.builder import Artifact, Builder  # typing
+from lektor.pluginsystem import Plugin
+from lektor.reporter import reporter
+from lektor.sourceobj import SourceObject, VirtualSourceObject
+from lektor.types.flow import Flow, FlowType
+from lektor.utils import bool_from_string, build_url, prune_file_and_folder
+
+from typing import \
+    NewType, NamedTuple, Tuple, Dict, Set, List, Optional, Iterator, Callable
+
+VPATH = '@groupby'  # potentially unsafe. All matching entries are pruned.
+
+
+# -----------------------------------
+#            Typing
+# -----------------------------------
+FieldValue = NewType('FieldValue', object)  # lektor model data-field value
+AttributeKey = NewType('AttributeKey', str)  # attribute of lektor model
+GroupKey = NewType('GroupKey', str)  # key of group-by
+
+
+class FieldKeyPath(NamedTuple):
+    fieldKey: str
+    flowIndex: Optional[int] = None
+    flowKey: Optional[str] = None
+
+
+class GroupByCallbackArgs(NamedTuple):
+    record: lektor.db.Record
+    key: FieldKeyPath
+    field: FieldValue
+
+
+class GroupByCallbackYield(NamedTuple):
+    key: GroupKey
+    extra: object
+
+
+GroupingCallback = Callable[[GroupByCallbackArgs],
+                            Iterator[GroupByCallbackYield]]
+
+
+class GroupProducer(NamedTuple):
+    attribute: AttributeKey
+    func: GroupingCallback
+    flatten: bool = True
+    template: Optional[str] = None
+    slug: Optional[str] = None
+
+
+class GroupComponent(NamedTuple):
+    record: lektor.db.Record
+    extra: object
+
+
+class UrlResolverConf(NamedTuple):
+    attribute: AttributeKey
+    group: GroupKey
+    slug: Optional[str] = None
+
+
+# -----------------------------------
+#            Actual logic
+# -----------------------------------
+
+
+class GroupBySource(VirtualSourceObject):
+    '''
+    Holds information for a single group/cluster.
+    This object is accessible in your template file.
+    Attributes: record, attribute, group, children, template, slug
+
+    :DEFAULTS:
+    template: "groupby-attribute.html"
+    slug: "{attrib}/{group}/index.html"
+    '''
+
+    def __init__(
+        self,
+        record: lektor.db.Record,
+        attribute: AttributeKey,
+        group: GroupKey,
+        children: List[GroupComponent] = [],
+        template: Optional[str] = None,  # default: "groupby-attribute.html"
+        slug: Optional[str] = None  # default: "{attrib}/{group}/index.html"
+    ):
+        super().__init__(record)
+        self.attribute = attribute
+        self.group = group
+        self.children = children
+        self.template = template or 'groupby-{}.html'.format(self.attribute)
+        # custom user path
+        slug = slug or '{attrib}/{group}/index.html'
+        slug = slug.replace('{attrib}', self.attribute)
+        slug = slug.replace('{group}', self.group)
+        if slug.endswith('/index.html'):
+            slug = slug[:-10]
+        self.slug = slug
+
+    @property
+    def path(self) -> str:
+        # Used in VirtualSourceInfo, used to prune VirtualObjects
+        return build_url([self.record.path, VPATH, self.attribute, self.group])
+
+    @property
+    def url_path(self) -> str:
+        return build_url([self.record.path, self.slug])
+
+    def iter_source_filenames(self) -> Iterator[str]:
+        for record, _ in self.children:
+            yield from record.iter_source_filenames()
+
+    def __str__(self) -> str:
+        txt = '<GroupBySource'
+        for x in ['attribute', 'group', 'template', 'slug']:
+            txt += ' {}="{}"'.format(x, getattr(self, x))
+        return txt + ' children={}>'.format(len(self.children))
+
+
+class GroupByBuildProgram(BuildProgram):
+    ''' Generates Build-Artifacts and write files. '''
+
+    def produce_artifacts(self) -> None:
+        url = self.source.url_path
+        if url.endswith('/'):
+            url += 'index.html'
+        self.declare_artifact(url, sources=list(
+            self.source.iter_source_filenames()))
+        GroupByPruner.track(url)
+
+    def build_artifact(self, artifact: Artifact) -> None:
+        self.source.pad.db.track_record_dependency(self.source)
+        artifact.render_template_into(self.source.template, this=self.source)
+
+
+# -----------------------------------
+#              Helper
+# -----------------------------------
+
+
+class GroupByPruner:
+    '''
+    Static collector for build-artifact urls.
+    All non-tracked VPATH-urls will be pruned after build.
+    '''
+    _cache: Set[str] = set()
+    # Note: this var is static or otherwise two instances of
+    #       GroupByCreator would prune each others artifacts.
+
+    @classmethod
+    def track(cls, url: str) -> None:
+        cls._cache.add(url.lstrip('/'))
+
+    @classmethod
+    def prune(cls, builder: Builder) -> None:
+        ''' Remove previously generated, unreferenced Artifacts. '''
+        dest_path = builder.destination_path
+        con = builder.connect_to_database()
+        try:
+            with builder.new_build_state() as build_state:
+                for url, file in build_state.iter_artifacts():
+                    if url.lstrip('/') in cls._cache:
+                        continue  # generated in this build-run
+                    infos = build_state.get_artifact_dependency_infos(url, [])
+                    for v_path, _ in infos:
+                        if VPATH not in v_path:
+                            continue  # we only care about groupby Virtuals
+                        reporter.report_pruned_artifact(url)
+                        prune_file_and_folder(file.filename, dest_path)
+                        build_state.remove_artifact(url)
+                        break  # there is only one VPATH-entry per source
+        finally:
+            con.close()
+        cls._cache.clear()
+
+
+# -----------------------------------
+#           Main Component
+# -----------------------------------
+
+
+class GroupByCreator:
+    '''
+    Process all children with matching conditions under specified page.
+    Creates a grouping of pages with similar (self-defined) attributes.
+    The grouping is performed only once per build (or manually invoked).
+    '''
+
+    def __init__(self):
+        self._flows: Dict[AttributeKey, Dict[str, Set[str]]] = {}
+        self._models: Dict[AttributeKey, Dict[str, Dict[str, str]]] = {}
+        self._func: Dict[str, Set[GroupProducer]] = {}
+        self._resolve_map: Dict[str, UrlResolverConf] = {}  # only for server
+
+    # --------------
+    #   Initialize
+    # --------------
+
+    def initialize(self, db: lektor.db):
+        self._flows.clear()
+        self._models.clear()
+        self._resolve_map.clear()
+        for prod_list in self._func.values():
+            for producer in prod_list:
+                self._register(db, producer.attribute)
+
+    def _register(self, db: lektor.db, attrib: AttributeKey) -> None:
+        ''' Preparation: find models and flow-models which contain attrib '''
+        if attrib in self._flows or attrib in self._models:
+            return  # already added
+        # find flow blocks with attrib
+        _flows = {}  # Dict[str, Set[str]]
+        for key, flow in db.flowblocks.items():
+            tmp1 = set(f.name for f in flow.fields
+                       if bool_from_string(f.options.get(attrib, False)))
+            if tmp1:
+                _flows[key] = tmp1
+        # find models with attrib or flow-blocks containing attrib
+        _models = {}  # Dict[str, Dict[str, str]]
+        for key, model in db.datamodels.items():
+            tmp2 = {}  # Dict[str, str]
+            for field in model.fields:
+                if bool_from_string(field.options.get(attrib, False)):
+                    tmp2[field.name] = '*'  # include all children
+                elif isinstance(field.type, FlowType):
+                    if any(x in _flows for x in field.type.flow_blocks):
+                        tmp2[field.name] = '?'  # only some flow blocks
+            if tmp2:
+                _models[key] = tmp2
+
+        self._flows[attrib] = _flows
+        self._models[attrib] = _models
+
+    # ----------------
+    #   Add Observer
+    # ----------------
+
+    def watch(
+        self,
+        root: str,
+        attrib: AttributeKey, *,
+        flatten: bool = True,  # if False, dont explode FlowType
+        template: Optional[str] = None,  # default: "groupby-attrib.html"
+        slug: Optional[str] = None  # default: "{attrib}/{group}/index.html"
+    ) -> Callable[[GroupingCallback], None]:
+        '''
+        Decorator to subscribe to attrib-elements. Converter for groupby().
+        Refer to groupby() for further details.
+
+        (record, field-key, field) -> (group-key, extra-info)
+
+        :DEFAULTS:
+        template: "groupby-attrib.html"
+        slug: "{attrib}/{group}/index.html"
+        '''
+        def _decorator(fn: GroupingCallback):
+            if root not in self._func:
+                self._func[root] = set()
+            self._func[root].add(
+                GroupProducer(attrib, fn, flatten, template, slug))
+
+        return _decorator
+
+    # ----------
+    #   Helper
+    # ----------
+
+    def iter_record_fields(
+        self,
+        source: lektor.db.Record,
+        attrib: AttributeKey,
+        flatten: bool = False
+    ) -> Iterator[Tuple[FieldKeyPath, FieldValue]]:
+        ''' Enumerate all fields of a lektor.db.Record with attrib = True '''
+        assert isinstance(source, lektor.db.Record)
+        _flows = self._flows.get(attrib, {})
+        _models = self._models.get(attrib, {})
+
+        for r_key, subs in _models.get(source.datamodel.id, {}).items():
+            if subs == '*':  # either normal field or flow type (all blocks)
+                field = source[r_key]
+                if flatten and isinstance(field, Flow):
+                    for i, flow in enumerate(field.blocks):
+                        flowtype = flow['_flowblock']
+                        for f_key, block in flow._data.items():
+                            if f_key.startswith('_'):  # e.g., _flowblock
+                                continue
+                            yield FieldKeyPath(r_key, i, f_key), block
+                else:
+                    yield FieldKeyPath(r_key), field
+            else:  # always flow type (only some blocks)
+                for i, flow in enumerate(source[r_key].blocks):
+                    flowtype = flow['_flowblock']
+                    for f_key in _flows.get(flowtype, []):
+                        yield FieldKeyPath(r_key, i, f_key), flow[f_key]
+
+    def groupby(
+        self,
+        attrib: AttributeKey,
+        root: lektor.db.Record,
+        func: GroupingCallback,
+        flatten: bool = False,
+        incl_attachments: bool = True
+    ) -> Dict[GroupKey, List[GroupComponent]]:
+        '''
+        Traverse selected root record with all children and group by func.
+        Func is called with (record, FieldKeyPath, FieldValue).
+        Func may yield one or more (group-key, extra-info) tuples.
+
+        return {'group-key': [(record, extra-info), ...]}
+        '''
+        assert callable(func), 'no GroupingCallback provided'
+        assert isinstance(root, lektor.db.Record)
+        tmap = {}  # type: Dict[GroupKey, List[GroupComponent]]
+        recursive_list = [root]  # type: List[lektor.db.Record]
+        while recursive_list:
+            record = recursive_list.pop()
+            if hasattr(record, 'children'):
+                # recursive_list += record.children
+                recursive_list.extend(record.children)
+            if incl_attachments and hasattr(record, 'attachments'):
+                # recursive_list += record.attachments
+                recursive_list.extend(record.attachments)
+            for key, field in self.iter_record_fields(record, attrib, flatten):
+                for ret in func(GroupByCallbackArgs(record, key, field)) or []:
+                    assert isinstance(ret, (tuple, list)), \
+                        'Must return tuple (group-key, extra-info)'
+                    group_key, extras = ret
+                    if group_key not in tmap:
+                        tmap[group_key] = []
+                    tmap[group_key].append(GroupComponent(record, extras))
+        return tmap
+
+    # -----------------
+    #   Create groups
+    # -----------------
+
+    def should_process(self, node: SourceObject) -> bool:
+        ''' Check if record path is being watched. '''
+        return isinstance(node, lektor.db.Record) \
+            and node.url_path in self._func
+
+    def make_cluster(self, root: lektor.db.Record) -> Iterator[GroupBySource]:
+        ''' Group by attrib and build Artifacts. '''
+        assert isinstance(root, lektor.db.Record)
+        for attrib, fn, flat, temp, slug in self._func.get(root.url_path, []):
+            groups = self.groupby(attrib, root, func=fn, flatten=flat)
+            for group_key, children in groups.items():
+                obj = GroupBySource(root, attrib, group_key, children,
+                                    template=temp, slug=slug)
+                self.track_dev_server_path(obj)
+                yield obj
+
+    # ------------------
+    #   Path resolving
+    # ------------------
+
+    def resolve_virtual_path(
+        self, node: SourceObject, pieces: List[str]
+    ) -> Optional[GroupBySource]:
+        ''' Given a @VPATH/attrib/groupkey path, determine url path. '''
+        if len(pieces) >= 2:
+            attrib: AttributeKey = pieces[0]  # type: ignore[assignment]
+            group: GroupKey = pieces[1]  # type: ignore[assignment]
+            for attr, _, _, _, slug in self._func.get(node.url_path, []):
+                if attr == attrib:
+                    # TODO: do we need to provide the template too?
+                    return GroupBySource(node, attr, group, slug=slug)
+        return None
+
+    def track_dev_server_path(self, sender: GroupBySource) -> None:
+        ''' Dev server only: Add target path to reverse artifact url lookup '''
+        self._resolve_map[sender.url_path] = \
+            UrlResolverConf(sender.attribute, sender.group, sender.slug)
+
+    def resolve_dev_server_path(
+        self, node: SourceObject, pieces: List[str]
+    ) -> Optional[GroupBySource]:
+        ''' Dev server only: Resolve actual url to virtual obj. '''
+        prev = self._resolve_map.get(build_url([node.url_path] + pieces))
+        if prev:
+            attrib, group, slug = prev
+            return GroupBySource(node, attrib, group, slug=slug)
+        return None
+
+
+# -----------------------------------
+#           Plugin Entry
+# -----------------------------------
+
+
+class GroupByPlugin(Plugin):
+    name = 'GroupBy Plugin'
+    description = 'Cluster arbitrary records with field attribute keyword.'
+
+    def on_setup_env(self, **extra):
+        self.creator = GroupByCreator()
+        self.env.add_build_program(GroupBySource, GroupByBuildProgram)
+        # let other plugins register their @groupby.watch functions
+        self.emit('init', groupby=self.creator)
+
+        # resolve /tag/rss/ -> /tag/rss/index.html (local server only)
+        @self.env.urlresolver
+        def groupby_path_resolver(node, pieces):
+            if self.creator.should_process(node):
+                return self.creator.resolve_dev_server_path(node, pieces)
+
+        # use VPATH in templates: {{ '/@groupby/attrib/group' | url }}
+        @self.env.virtualpathresolver(VPATH.lstrip('@'))
+        def groupby_virtualpath_resolver(node, pieces):
+            if self.creator.should_process(node):
+                return self.creator.resolve_virtual_path(node, pieces)
+
+        # injection to generate GroupBy nodes when processing artifacts
+        @self.env.generator
+        def groupby_generator(node):
+            if self.creator.should_process(node):
+                yield from self.creator.make_cluster(node)
+
+    def on_before_build_all(self, builder, **extra):
+        # parse all models to detect attribs of listeners
+        self.creator.initialize(builder.pad.db)
+
+    def on_after_prune(self, builder, **extra):
+        # TODO: find better way to prune unreferenced elements
+        GroupByPruner.prune(builder)
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..fb755b0
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,43 @@
+from setuptools import setup
+
+with open('README.md') as fp:
+    longdesc = fp.read()
+
+setup(
+    name='lektor-groupby',
+    py_modules=['lektor_groupby'],
+    entry_points={
+        'lektor.plugins': [
+            'groupby = lektor_groupby:GroupByPlugin',
+        ]
+    },
+    author='relikd',
+    url='https://github.com/relikd/lektor-groupby-plugin',
+    version='0.8',
+    description='Cluster arbitrary records with field attribute keyword.',
+    long_description=longdesc,
+    long_description_content_type="text/markdown",
+    license='MIT',
+    python_requires='>=3.6',
+    keywords=[
+        'lektor',
+        'plugin',
+        'groupby',
+        'grouping',
+        'cluster',
+    ],
+    classifiers=[
+        'Development Status :: 5 - Production/Stable',
+        'Environment :: Web Environment',
+        'Environment :: Plugins',
+        'Framework :: Lektor',
+        'License :: OSI Approved :: MIT License',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3 :: Only',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+    ],
+)