From 0fd5ec64586ced46b6c91ad07fe33c5886e80ac1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= Date: Sat, 11 Oct 2025 17:08:22 +0200 Subject: [PATCH] feat: add hash to HTML filename in case of conflict --- README.md | 1 - src/feather/data.py | 42 +++++++++++++++++++++++++++--------------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 6ef2c7f..a66d13d 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,6 @@ You need Python 3.12 or newer. Then pip it up, as the kids say. - [ ] Write documentation - [ ] Use inotify for real-time article mark-as-read action - [ ] Share the fun somewhere -- [ ] Actually think about the issues created by the duplicate warning - [ ] Get article attachments - [ ] Test with FreshRSS diff --git a/src/feather/data.py b/src/feather/data.py index 572ac9d..7c01449 100644 --- a/src/feather/data.py +++ b/src/feather/data.py @@ -87,7 +87,8 @@ class Article(ABC): # no default value, computed by compute_fields published_formatted: str # article publication time (text) updated_formatted: str # article publication time (text) - html_path: str # html path, relative to the html_root directory + # no default value, computed on save_html + html_path: str = None # html path, relative to the html_root directory (None will force it to be recomputed on next save_html) # with default value unread: bool = True # if the article is unread title: str = "" # article title @@ -104,7 +105,7 @@ class Article(ABC): comments_url: str = "" # article comments URL language: str = "" # article language image_url: str = "" # article main image - + def _get_html_path(self): config = self.config @@ -123,22 +124,17 @@ class Article(ABC): config, config.article_filename_template.render(self._get_template_dict()), ) + html_path = category_directory / html_name + # Add hash if filename already exists + if html_path.exists(): + id_hash = sha256(str(self.id).encode("utf-8")).hexdigest()[:8] + html_path = html_path.parent / sanitize_filename( + config, html_path.name, insert_before_suffix=f".{id_hash}" + ) return html_path.relative_to(config.html_root) - def compute_fields(self): - config = self.config - self.updated_formatted = format_datetime(config, self.updated) - self.published_formatted = format_datetime(config, self.published) - self.json_path = ( - config.json_root - / f"{sha256(str(self.id).encode('utf-8')).hexdigest()}.json" - ) - self.html_path = str( - self.get_html_path().relative_to(config.html_root) - ) # TODO: do this dynamically on write, handle overwrite conflict at the same time - - def get_template_dict(self) -> dict: + def _get_template_dict(self) -> dict: template_fields = ( "id", "unread", @@ -163,6 +159,16 @@ class Article(ABC): d["category"] = self.category.asdict() return d + def compute_fields(self): + config = self.config + self.updated_formatted = format_datetime(config, self.updated) + self.published_formatted = format_datetime(config, self.published) + self.json_path = ( + config.json_root + / f"{sha256(str(self.id).encode('utf-8')).hexdigest()}.json" + ) + self.html_path = None + def write_json(self): """Write the JSON file associated with this article. Error if it already exists.""" stored_fields = ( @@ -208,8 +214,14 @@ class Article(ABC): return html_path.exists() def write_html(self): + """Write the HTML file associated with this article. Error if it already exists. + Note: this may recompute html_path, which is saved into the JSON - so make sure to save the JSON file _after_ the HTML file.""" # Write HTML file for a JSON object config = self.config + + if self.html_path is None: + self.html_path = str(self._get_html_path()) + html_path = config.html_root / self.html_path if html_path.exists(): raise Exception(