From 2eb8d820c05f780bf4ea08d590b275c31675b8e4 Mon Sep 17 00:00:00 2001 From: relikd Date: Fri, 25 Mar 2022 17:05:03 +0100 Subject: [PATCH] First version --- README.md | 135 +++++++++++++++ lektor_groupby.py | 429 ++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 43 +++++ 3 files changed, 607 insertions(+) create mode 100644 README.md create mode 100644 lektor_groupby.py create mode 100644 setup.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..a57ddf7 --- /dev/null +++ b/README.md @@ -0,0 +1,135 @@ +# Lektor Plugin: @groupby + +A generic grouping / clustering plugin. Can be used for tagging and similar tasks. + + +## Usage: Simple Example + +Lets start with a simple example: adding a tags field to your model. +Assuming you have a `blog-entry.ini` that is used for all children of `/blog` path. + + +#### `models/blog-entry.ini` + +```ini +[fields.tags] +label = Tags +type = strings +myvar = true + +[fields.body] +label = Content +type = markdown +``` + +Notice we introduce a new attribute variable: `myvar = true`. +The name can be anything here, we will come to that later. +The only thing that matters is that the value is a boolean and set to true. + +Edit your blog entry and add these two new tags: + +``` +Awesome +Latest News +``` + +Next, we need a plugin to add the groupby event listener. + + +#### `packages/test/lektor_my_tags_plugin.py` + +```python +def on_groupby_init(self, groupby, **extra): + @groupby.watch('/blog', 'myvar', flatten=True, template='myvar.html', + slug='tag/{group}/index.html') + def do_myvar(args): + page = args.record # extract additional info from source + fieldKey, flowIndex, flowKey = args.key # or get field index directly + # val = page.get(fieldKey).blocks[flowIndex].get(flowKey) + value = args.field # list type since model is 'strings' type + for tag in value: + yield slugify(tag), {'val': tag, 'tags_in_page': len(value)} +``` + +There are a few important things here: + +1. The first parameter (`'/blog'`) is the root page of the groupby. + All results will be placed under this directory, e.g., `/blog/tags/clean/`. + You can also just use `/`, in which case the same path would be `/tags/clean/`. + Or create multiple listeners, one for `/blog/` and another for `/projects/`, etc. +2. The second parameter (`'myvar'`) must be the same attribute variable we used in our `blog-entry.ini` model. + The groupby plugin will traverse all models and search for this attribute name. +3. Flatten determines how Flow elements are processed. + If `False`, the callback function `convert_myvar()` is called once per Flow element (if the Flow element has the `myvar` attribute attached). + If `True` (default), the callback is called for all Flow blocks individually. +4. The template `myvar.html` is used to render the grouping page. + This parameter is optional. + If no explicit template is set, the default template `groupby-myvar.html` would be used. Where `myvar` is replaced with whatever attribute you chose. +5. Finally, the slug `tag/{group}/index.html` is where the result is placed. + The default value for this parameter is `{attrib}/{group}/index.html`. + In our case, the default path would resolve to `myvar/awesome/index.html`. + We explicitly chose to replace the default slug with our own, which ignores the attrib path component and instead puts the result pages inside the `/tag` directory. + (PS: you could also use for example `t/{group}.html`, etc.) + + +So much for the `args` parameter. +The callback body **can** produce groupings but does not have to. +If you choose to produce an entry, you have to `yield` a tuple pair of `(groupkey, extra-info)`. +`groupkey` is used to combine & cluster pages and must be URL-safe. +The `extra-info` is passed through to your template file. +You can yield more than one entry per source or filter / ignore pages if you don't yield anything. +Our simple example will generate the output files `tag/awesome/index.html` and `tag/latest-news/index.html`. + +Lets take a look at the html next. + + +#### `templates/myvar.html` + +```html +

Path: {{ this | url(absolute=True) }}

+
This is: {{this}}
+ +``` + +Notice, we can use `child.record` to access the referenced page of the group cluster. +`child.extra` contains the additional information we previously passed into the template. + +The final result of `tag/latest-news/index.html`: + +``` +Path: /tag/latest-news/ +This is: + - Page: /blog/barss, Name: Latest News, Tag count: 2 +``` + + +## Usage: A slightly more complex example + +```python +from lektor.markdown import Markdown +from lektor.types.formats import MarkdownDescriptor +from lektor.utils import slugify +import re +_regex = re.compile(r'{{([^}]{1,32})}}') + +def on_groupby_init(self, groupby, **extra): + @groupby.watch('/', 'inlinetags', slug='tags/{group}/') + def convert_inlinetags(args): + arr = args.field if isinstance(args.field, list) else [args.field] + for obj in arr: + if isinstance(obj, (Markdown, MarkdownDescriptor)): + obj = obj.source + if isinstance(obj, str) and str: + for match in _regex.finditer(obj): + tag = match.group(1) + yield slugify(tag), tag +``` + +This will find all model fields with attribute `inlinetags` and search for in-text occurrences of `{{Tagname}}`, etc. +This generic approach does not care what data-type the field value is: +`strings` fields will be expanded and enumerated, Markdown will be unpacked. +You can combine this mere tag-detector with text-replacements to point to the actual tags-page. diff --git a/lektor_groupby.py b/lektor_groupby.py new file mode 100644 index 0000000..0f3c9e0 --- /dev/null +++ b/lektor_groupby.py @@ -0,0 +1,429 @@ +# -*- coding: utf-8 -*- +import lektor.db # typing +from lektor.build_programs import BuildProgram +from lektor.builder import Artifact, Builder # typing +from lektor.pluginsystem import Plugin +from lektor.reporter import reporter +from lektor.sourceobj import SourceObject, VirtualSourceObject +from lektor.types.flow import Flow, FlowType +from lektor.utils import bool_from_string, build_url, prune_file_and_folder + +from typing import \ + NewType, NamedTuple, Tuple, Dict, Set, List, Optional, Iterator, Callable + +VPATH = '@groupby' # potentially unsafe. All matching entries are pruned. + + +# ----------------------------------- +# Typing +# ----------------------------------- +FieldValue = NewType('FieldValue', object) # lektor model data-field value +AttributeKey = NewType('AttributeKey', str) # attribute of lektor model +GroupKey = NewType('GroupKey', str) # key of group-by + + +class FieldKeyPath(NamedTuple): + fieldKey: str + flowIndex: Optional[int] = None + flowKey: Optional[str] = None + + +class GroupByCallbackArgs(NamedTuple): + record: lektor.db.Record + key: FieldKeyPath + field: FieldValue + + +class GroupByCallbackYield(NamedTuple): + key: GroupKey + extra: object + + +GroupingCallback = Callable[[GroupByCallbackArgs], + Iterator[GroupByCallbackYield]] + + +class GroupProducer(NamedTuple): + attribute: AttributeKey + func: GroupingCallback + flatten: bool = True + template: Optional[str] = None + slug: Optional[str] = None + + +class GroupComponent(NamedTuple): + record: lektor.db.Record + extra: object + + +class UrlResolverConf(NamedTuple): + attribute: AttributeKey + group: GroupKey + slug: Optional[str] = None + + +# ----------------------------------- +# Actual logic +# ----------------------------------- + + +class GroupBySource(VirtualSourceObject): + ''' + Holds information for a single group/cluster. + This object is accessible in your template file. + Attributes: record, attribute, group, children, template, slug + + :DEFAULTS: + template: "groupby-attribute.html" + slug: "{attrib}/{group}/index.html" + ''' + + def __init__( + self, + record: lektor.db.Record, + attribute: AttributeKey, + group: GroupKey, + children: List[GroupComponent] = [], + template: Optional[str] = None, # default: "groupby-attribute.html" + slug: Optional[str] = None # default: "{attrib}/{group}/index.html" + ): + super().__init__(record) + self.attribute = attribute + self.group = group + self.children = children + self.template = template or 'groupby-{}.html'.format(self.attribute) + # custom user path + slug = slug or '{attrib}/{group}/index.html' + slug = slug.replace('{attrib}', self.attribute) + slug = slug.replace('{group}', self.group) + if slug.endswith('/index.html'): + slug = slug[:-10] + self.slug = slug + + @property + def path(self) -> str: + # Used in VirtualSourceInfo, used to prune VirtualObjects + return build_url([self.record.path, VPATH, self.attribute, self.group]) + + @property + def url_path(self) -> str: + return build_url([self.record.path, self.slug]) + + def iter_source_filenames(self) -> Iterator[str]: + for record, _ in self.children: + yield from record.iter_source_filenames() + + def __str__(self) -> str: + txt = ''.format(len(self.children)) + + +class GroupByBuildProgram(BuildProgram): + ''' Generates Build-Artifacts and write files. ''' + + def produce_artifacts(self) -> None: + url = self.source.url_path + if url.endswith('/'): + url += 'index.html' + self.declare_artifact(url, sources=list( + self.source.iter_source_filenames())) + GroupByPruner.track(url) + + def build_artifact(self, artifact: Artifact) -> None: + self.source.pad.db.track_record_dependency(self.source) + artifact.render_template_into(self.source.template, this=self.source) + + +# ----------------------------------- +# Helper +# ----------------------------------- + + +class GroupByPruner: + ''' + Static collector for build-artifact urls. + All non-tracked VPATH-urls will be pruned after build. + ''' + _cache: Set[str] = set() + # Note: this var is static or otherwise two instances of + # GroupByCreator would prune each others artifacts. + + @classmethod + def track(cls, url: str) -> None: + cls._cache.add(url.lstrip('/')) + + @classmethod + def prune(cls, builder: Builder) -> None: + ''' Remove previously generated, unreferenced Artifacts. ''' + dest_path = builder.destination_path + con = builder.connect_to_database() + try: + with builder.new_build_state() as build_state: + for url, file in build_state.iter_artifacts(): + if url.lstrip('/') in cls._cache: + continue # generated in this build-run + infos = build_state.get_artifact_dependency_infos(url, []) + for v_path, _ in infos: + if VPATH not in v_path: + continue # we only care about groupby Virtuals + reporter.report_pruned_artifact(url) + prune_file_and_folder(file.filename, dest_path) + build_state.remove_artifact(url) + break # there is only one VPATH-entry per source + finally: + con.close() + cls._cache.clear() + + +# ----------------------------------- +# Main Component +# ----------------------------------- + + +class GroupByCreator: + ''' + Process all children with matching conditions under specified page. + Creates a grouping of pages with similar (self-defined) attributes. + The grouping is performed only once per build (or manually invoked). + ''' + + def __init__(self): + self._flows: Dict[AttributeKey, Dict[str, Set[str]]] = {} + self._models: Dict[AttributeKey, Dict[str, Dict[str, str]]] = {} + self._func: Dict[str, Set[GroupProducer]] = {} + self._resolve_map: Dict[str, UrlResolverConf] = {} # only for server + + # -------------- + # Initialize + # -------------- + + def initialize(self, db: lektor.db): + self._flows.clear() + self._models.clear() + self._resolve_map.clear() + for prod_list in self._func.values(): + for producer in prod_list: + self._register(db, producer.attribute) + + def _register(self, db: lektor.db, attrib: AttributeKey) -> None: + ''' Preparation: find models and flow-models which contain attrib ''' + if attrib in self._flows or attrib in self._models: + return # already added + # find flow blocks with attrib + _flows = {} # Dict[str, Set[str]] + for key, flow in db.flowblocks.items(): + tmp1 = set(f.name for f in flow.fields + if bool_from_string(f.options.get(attrib, False))) + if tmp1: + _flows[key] = tmp1 + # find models with attrib or flow-blocks containing attrib + _models = {} # Dict[str, Dict[str, str]] + for key, model in db.datamodels.items(): + tmp2 = {} # Dict[str, str] + for field in model.fields: + if bool_from_string(field.options.get(attrib, False)): + tmp2[field.name] = '*' # include all children + elif isinstance(field.type, FlowType): + if any(x in _flows for x in field.type.flow_blocks): + tmp2[field.name] = '?' # only some flow blocks + if tmp2: + _models[key] = tmp2 + + self._flows[attrib] = _flows + self._models[attrib] = _models + + # ---------------- + # Add Observer + # ---------------- + + def watch( + self, + root: str, + attrib: AttributeKey, *, + flatten: bool = True, # if False, dont explode FlowType + template: Optional[str] = None, # default: "groupby-attrib.html" + slug: Optional[str] = None # default: "{attrib}/{group}/index.html" + ) -> Callable[[GroupingCallback], None]: + ''' + Decorator to subscribe to attrib-elements. Converter for groupby(). + Refer to groupby() for further details. + + (record, field-key, field) -> (group-key, extra-info) + + :DEFAULTS: + template: "groupby-attrib.html" + slug: "{attrib}/{group}/index.html" + ''' + def _decorator(fn: GroupingCallback): + if root not in self._func: + self._func[root] = set() + self._func[root].add( + GroupProducer(attrib, fn, flatten, template, slug)) + + return _decorator + + # ---------- + # Helper + # ---------- + + def iter_record_fields( + self, + source: lektor.db.Record, + attrib: AttributeKey, + flatten: bool = False + ) -> Iterator[Tuple[FieldKeyPath, FieldValue]]: + ''' Enumerate all fields of a lektor.db.Record with attrib = True ''' + assert isinstance(source, lektor.db.Record) + _flows = self._flows.get(attrib, {}) + _models = self._models.get(attrib, {}) + + for r_key, subs in _models.get(source.datamodel.id, {}).items(): + if subs == '*': # either normal field or flow type (all blocks) + field = source[r_key] + if flatten and isinstance(field, Flow): + for i, flow in enumerate(field.blocks): + flowtype = flow['_flowblock'] + for f_key, block in flow._data.items(): + if f_key.startswith('_'): # e.g., _flowblock + continue + yield FieldKeyPath(r_key, i, f_key), block + else: + yield FieldKeyPath(r_key), field + else: # always flow type (only some blocks) + for i, flow in enumerate(source[r_key].blocks): + flowtype = flow['_flowblock'] + for f_key in _flows.get(flowtype, []): + yield FieldKeyPath(r_key, i, f_key), flow[f_key] + + def groupby( + self, + attrib: AttributeKey, + root: lektor.db.Record, + func: GroupingCallback, + flatten: bool = False, + incl_attachments: bool = True + ) -> Dict[GroupKey, List[GroupComponent]]: + ''' + Traverse selected root record with all children and group by func. + Func is called with (record, FieldKeyPath, FieldValue). + Func may yield one or more (group-key, extra-info) tuples. + + return {'group-key': [(record, extra-info), ...]} + ''' + assert callable(func), 'no GroupingCallback provided' + assert isinstance(root, lektor.db.Record) + tmap = {} # type: Dict[GroupKey, List[GroupComponent]] + recursive_list = [root] # type: List[lektor.db.Record] + while recursive_list: + record = recursive_list.pop() + if hasattr(record, 'children'): + # recursive_list += record.children + recursive_list.extend(record.children) + if incl_attachments and hasattr(record, 'attachments'): + # recursive_list += record.attachments + recursive_list.extend(record.attachments) + for key, field in self.iter_record_fields(record, attrib, flatten): + for ret in func(GroupByCallbackArgs(record, key, field)) or []: + assert isinstance(ret, (tuple, list)), \ + 'Must return tuple (group-key, extra-info)' + group_key, extras = ret + if group_key not in tmap: + tmap[group_key] = [] + tmap[group_key].append(GroupComponent(record, extras)) + return tmap + + # ----------------- + # Create groups + # ----------------- + + def should_process(self, node: SourceObject) -> bool: + ''' Check if record path is being watched. ''' + return isinstance(node, lektor.db.Record) \ + and node.url_path in self._func + + def make_cluster(self, root: lektor.db.Record) -> Iterator[GroupBySource]: + ''' Group by attrib and build Artifacts. ''' + assert isinstance(root, lektor.db.Record) + for attrib, fn, flat, temp, slug in self._func.get(root.url_path, []): + groups = self.groupby(attrib, root, func=fn, flatten=flat) + for group_key, children in groups.items(): + obj = GroupBySource(root, attrib, group_key, children, + template=temp, slug=slug) + self.track_dev_server_path(obj) + yield obj + + # ------------------ + # Path resolving + # ------------------ + + def resolve_virtual_path( + self, node: SourceObject, pieces: List[str] + ) -> Optional[GroupBySource]: + ''' Given a @VPATH/attrib/groupkey path, determine url path. ''' + if len(pieces) >= 2: + attrib: AttributeKey = pieces[0] # type: ignore[assignment] + group: GroupKey = pieces[1] # type: ignore[assignment] + for attr, _, _, _, slug in self._func.get(node.url_path, []): + if attr == attrib: + # TODO: do we need to provide the template too? + return GroupBySource(node, attr, group, slug=slug) + return None + + def track_dev_server_path(self, sender: GroupBySource) -> None: + ''' Dev server only: Add target path to reverse artifact url lookup ''' + self._resolve_map[sender.url_path] = \ + UrlResolverConf(sender.attribute, sender.group, sender.slug) + + def resolve_dev_server_path( + self, node: SourceObject, pieces: List[str] + ) -> Optional[GroupBySource]: + ''' Dev server only: Resolve actual url to virtual obj. ''' + prev = self._resolve_map.get(build_url([node.url_path] + pieces)) + if prev: + attrib, group, slug = prev + return GroupBySource(node, attrib, group, slug=slug) + return None + + +# ----------------------------------- +# Plugin Entry +# ----------------------------------- + + +class GroupByPlugin(Plugin): + name = 'GroupBy Plugin' + description = 'Cluster arbitrary records with field attribute keyword.' + + def on_setup_env(self, **extra): + self.creator = GroupByCreator() + self.env.add_build_program(GroupBySource, GroupByBuildProgram) + # let other plugins register their @groupby.watch functions + self.emit('init', groupby=self.creator) + + # resolve /tag/rss/ -> /tag/rss/index.html (local server only) + @self.env.urlresolver + def groupby_path_resolver(node, pieces): + if self.creator.should_process(node): + return self.creator.resolve_dev_server_path(node, pieces) + + # use VPATH in templates: {{ '/@groupby/attrib/group' | url }} + @self.env.virtualpathresolver(VPATH.lstrip('@')) + def groupby_virtualpath_resolver(node, pieces): + if self.creator.should_process(node): + return self.creator.resolve_virtual_path(node, pieces) + + # injection to generate GroupBy nodes when processing artifacts + @self.env.generator + def groupby_generator(node): + if self.creator.should_process(node): + yield from self.creator.make_cluster(node) + + def on_before_build_all(self, builder, **extra): + # parse all models to detect attribs of listeners + self.creator.initialize(builder.pad.db) + + def on_after_prune(self, builder, **extra): + # TODO: find better way to prune unreferenced elements + GroupByPruner.prune(builder) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..fb755b0 --- /dev/null +++ b/setup.py @@ -0,0 +1,43 @@ +from setuptools import setup + +with open('README.md') as fp: + longdesc = fp.read() + +setup( + name='lektor-groupby', + py_modules=['lektor_groupby'], + entry_points={ + 'lektor.plugins': [ + 'groupby = lektor_groupby:GroupByPlugin', + ] + }, + author='relikd', + url='https://github.com/relikd/lektor-groupby-plugin', + version='0.8', + description='Cluster arbitrary records with field attribute keyword.', + long_description=longdesc, + long_description_content_type="text/markdown", + license='MIT', + python_requires='>=3.6', + keywords=[ + 'lektor', + 'plugin', + 'groupby', + 'grouping', + 'cluster', + ], + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Environment :: Web Environment', + 'Environment :: Plugins', + 'Framework :: Lektor', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3 :: Only', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + ], +)