From 55916a45190479d3cf7184352dd956c4f2f6a31b Mon Sep 17 00:00:00 2001 From: relikd Date: Wed, 6 Apr 2022 20:52:53 +0200 Subject: [PATCH] fix duplicate vobj for same slug --- lektor_groupby/util.py | 16 +++++++++ lektor_groupby/watcher.py | 75 +++++++++++++++++---------------------- 2 files changed, 49 insertions(+), 42 deletions(-) diff --git a/lektor_groupby/util.py b/lektor_groupby/util.py index a12f934..d051120 100644 --- a/lektor_groupby/util.py +++ b/lektor_groupby/util.py @@ -1,5 +1,8 @@ from lektor.reporter import reporter, style +from typing import List +from itertools import groupby + def report_config_error(key: str, field: str, val: str, e: Exception) -> None: ''' Send error message to Lektor reporter. Indicate which field is bad. ''' @@ -9,3 +12,16 @@ def report_config_error(key: str, field: str, val: str, e: Exception) -> None: reporter._write_line(style(msg, fg='red')) except Exception: print(msg) # fallback in case Lektor API changes + + +def most_used_key(keys: List[str]) -> str: + if len(keys) < 3: + return keys[0] # TODO: first vs last occurrence + best_count = 0 + best_key = '' + for key, itr in groupby(keys): + count = sum(1 for i in itr) + if count > best_count: # TODO: (>) vs (>=), first vs last occurrence + best_count = count + best_key = key + return best_key diff --git a/lektor_groupby/watcher.py b/lektor_groupby/watcher.py index 56d4a6f..d166103 100644 --- a/lektor_groupby/watcher.py +++ b/lektor_groupby/watcher.py @@ -3,9 +3,10 @@ from lektor.types.flow import Flow, FlowType from lektor.utils import bool_from_string from typing import Set, Dict, List, Tuple, Any, Union, NamedTuple -from typing import Optional, Callable, Iterable, Iterator, Generator +from typing import Optional, Callable, Iterator, Generator from .vobj import GroupBySource from .config import Config +from .util import most_used_key # ----------------------------------- @@ -91,36 +92,6 @@ class GroupByModelReader: yield FieldKeyPath(r_key, i, f_key), flow[f_key] -# ----------------------------------- -# State -# ----------------------------------- - -class GroupByState: - ''' Store and update a groupby build state. {group: {record: [extras]}} ''' - - def __init__(self) -> None: - self.state = {} # type: Dict[str, Dict[Record, List[Any]]] - self._processed = set() # type: Set[Record] - - def __contains__(self, record: Record) -> bool: - ''' Returns True if record was already processed. ''' - return record.path in self._processed - - def items(self) -> Iterable[Tuple[str, Dict[Record, List[Any]]]]: - ''' Iterable with (group, {record: [extras]}) tuples. ''' - return self.state.items() - - def add(self, record: Record, sub_groups: Dict[str, List[Any]]) -> None: - ''' Append groups if not processed already. {group: [extras]} ''' - if record.path not in self._processed: - self._processed.add(record.path) - for group, extras in sub_groups.items(): - if group in self.state: - self.state[group][record] = extras - else: - self.state[group] = {record: extras} - - # ----------------------------------- # Watcher # ----------------------------------- @@ -153,8 +124,10 @@ class Watcher: ''' Reset internal state. You must initialize before each build! ''' assert callable(self.callback), 'No grouping callback provided.' self._root = self.config.root - self._state = GroupByState() self._model_reader = GroupByModelReader(db, attrib=self.config.key) + self._state = {} # type: Dict[str, Dict[Record, List[Any]]] + self._group_map = {} # type: Dict[str, List[str]] + self._processed = set() # type: Set[str] def should_process(self, node: Record) -> bool: ''' Check if record path is being watched. ''' @@ -165,9 +138,9 @@ class Watcher: Will iterate over all record fields and call the callback method. Each record is guaranteed to be processed only once. ''' - if record in self._state: + if record.path in self._processed: return - tmp = {} # type: Dict[str, List[Any]] # {group: [extras]} + self._processed.add(record.path) for key, field in self._model_reader.read(record, self.flatten): _gen = self.callback(GroupByCallbackArgs(record, key, field)) try: @@ -175,24 +148,42 @@ class Watcher: while True: if not isinstance(obj, (str, tuple)): raise TypeError(f'Unsupported groupby yield: {obj}') - group = obj if isinstance(obj, str) else obj[0] - if group not in tmp: - tmp[group] = [] - if isinstance(obj, tuple): - tmp[group].append(obj[1]) + slug = self._persist(record, obj) # return slugified group key and continue iteration if isinstance(_gen, Generator) and not _gen.gi_yieldfrom: - obj = _gen.send(self.config.slugify(group)) + obj = _gen.send(slug) else: obj = next(_gen) except StopIteration: del _gen - self._state.add(record, tmp) + + def _persist(self, record: Record, obj: Union[str, tuple]) -> str: + group = obj if isinstance(obj, str) else obj[0] + slug = self.config.slugify(group) + # init group-key + if slug not in self._state: + self._state[slug] = {} + self._group_map[slug] = [] + # _group_map is later used to find most used group + self._group_map[slug].append(group) + # init group extras + if record not in self._state[slug]: + self._state[slug][record] = [] + # (optional) append extra + if isinstance(obj, tuple): + self._state[slug][record].append(obj[1]) + return slug def iter_sources(self, root: Record) -> Iterator[GroupBySource]: ''' Prepare and yield GroupBySource elements. ''' - for group, children in self._state.items(): + for key, children in self._state.items(): + group = most_used_key(self._group_map[key]) yield GroupBySource(root, group, self.config, children=children) + # cleanup. remove this code if you'd like to iter twice + del self._model_reader + del self._state + del self._group_map + del self._processed def __repr__(self) -> str: return ''.format(