From 960e06252e0f9aa06665ce5cf960cbbcdcd84c8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= <fildadut@reuh.eu>
Date: Sat, 11 Oct 2025 17:07:34 +0200
Subject: [PATCH] refactor: code comments & cleaning

---
 src/feather/client.py  |  2 +-
 src/feather/data.py    | 86 ++++++++++++++++++++++++++----------------
 src/feather/feather.py | 34 +++++++++--------
 3 files changed, 72 insertions(+), 50 deletions(-)

diff --git a/src/feather/client.py b/src/feather/client.py
index 7839423..012bd07 100644
--- a/src/feather/client.py
+++ b/src/feather/client.py
@@ -148,7 +148,7 @@ class TTRSession(ClientSession):
         self.ttrss.login()
         self.feeds = {}
 
-    def set_unread(self, article_ids: list[ArticleId], read: bool = True):
+    def set_read_flag(self, article_ids: list[ArticleId], read: bool = True):
         if read:
             self.ttrss.mark_read(article_ids)
         else:
diff --git a/src/feather/data.py b/src/feather/data.py
index 6535afc..572ac9d 100644
--- a/src/feather/data.py
+++ b/src/feather/data.py
@@ -12,27 +12,33 @@ from hashlib import sha256
 from feather.config import Config
 
 
-def escape_filename(config, filename):
-    return filename.translate(config.filename_translation)
+def sanitize_filename(
+    config: Config, filename: str, insert_before_suffix: str = ""
+) -> str:
+    """Escape invalid caracters and truncate the filename as per the configuration.
+    This operates on a single filename, not a path.
+    (insert_before_suffix will be inserted between the stem and suffix, and is assumed to not need escaping)"""
+    filename = filename.translate(config.filename_translation)
 
-
-def truncate_filename(config, filename):
     max_filename_length = config.max_filename_length
-    filename_utf8 = filename.encode("utf-8")
-    if len(filename_utf8) <= max_filename_length:
-        return filename
+    filename_len = len(filename.encode("utf-8"))
+    insert_before_suffix_len = len(insert_before_suffix.encode("utf-8"))
+    if filename_len + insert_before_suffix_len <= max_filename_length:
+        path = Path(filename)
+        return f"{path.stem}{insert_before_suffix}{path.suffix}"
     else:
         suffix = Path(filename).suffix
-        max_basename_length = max_filename_length - len(suffix.encode("utf-8"))
-        cutoff = len(
-            filename.encode("utf-8")[:max_basename_length].decode(
-                "utf-8", errors="ignore"
-            )
+        max_stem_length = (
+            max_filename_length - insert_before_suffix_len - len(suffix.encode("utf-8"))
         )
-        return filename[:cutoff] + "…" + suffix
+        cutoff = len(
+            filename.encode("utf-8")[:max_stem_length].decode("utf-8", errors="ignore")
+        )
+        return filename[:cutoff] + "…" + insert_before_suffix + suffix
 
 
-def format_datetime(config, timestamp):
+def format_datetime(config: Config, timestamp: int) -> str:
+    """Format a timestamp according to the configuraiton"""
     return datetime.fromtimestamp(timestamp, config.timezone).strftime(
         config.time_format
     )
@@ -71,12 +77,12 @@ type ArticleId = int | str
 
 class Article(ABC):
     config: Config
-    json_path: Path
+    json_path: Path  # JSON path
 
     # fields serialized into the JSON file #
 
     # no default value
-    id: ArticleId  # article id
+    id: ArticleId  # article unique id
     category: Category  # feed category
     # no default value, computed by compute_fields
     published_formatted: str  # article publication time (text)
@@ -98,27 +104,27 @@ class Article(ABC):
     comments_url: str = ""  # article comments URL
     language: str = ""  # article language
     image_url: str = ""  # article main image
-
-    def get_html_path(self):
+    
+    def _get_html_path(self):
         config = self.config
-        category_directory = config.html_root
+
+        # Category directory path
+        category_directory = self.config.html_root
         for category in self.category.parents:
-            category_directory /= escape_filename(
+            category_directory /= sanitize_filename(
                 config, config.article_category_template.render(category.asdict())
             )
-        category_directory /= escape_filename(
+        category_directory /= sanitize_filename(
             config, config.article_category_template.render(self.category.asdict())
         )
 
-        html_name = truncate_filename(
+        # Filename
+        html_name = sanitize_filename(
             config,
-            escape_filename(
-                config,
-                config.article_filename_template.render(self.get_template_dict()),
-            ),
+            config.article_filename_template.render(self._get_template_dict()),
         )
 
-        return category_directory / html_name
+        return html_path.relative_to(config.html_root)
 
     def compute_fields(self):
         config = self.config
@@ -158,6 +164,7 @@ class Article(ABC):
         return d
 
     def write_json(self):
+        """Write the JSON file associated with this article. Error if it already exists."""
         stored_fields = (
             "id",
             "unread",
@@ -189,45 +196,58 @@ class Article(ABC):
             json.dump(article_json, f)
 
     def delete_json(self):
+        """Delete the JSON file associated with this article."""
         self.json_path.unlink()
 
+    def has_html(self):
+        """Check if the HTML file associated with the article exists on disk."""
+        if self.html_path is None:
+            return False
+        else:
+            html_path = self.config.html_root / self.html_path
+            return html_path.exists()
+
     def write_html(self):
         # Write HTML file for a JSON object
         config = self.config
         html_path = config.html_root / self.html_path
-        if html_path.exists():  # TODO: does this actually matter
-            print(
-                f"WARNING: a file already exist for {html_path}. Either the feed has duplicate entries, or something has gone terribly wrong."
+        if html_path.exists():
+            raise Exception(
+                f"Unexpectedly tried to overwrite article file for {html_path}. Either the feed has duplicate entries, or something has gone terribly wrong."
             )
         else:
             html_path.parent.mkdir(parents=True, exist_ok=True)
             with html_path.open("w") as f:
-                f.write(config.article_template.render(self.get_template_dict()))
+                f.write(config.article_template.render(self._get_template_dict()))
             # set accessed date to update time, modified to publication time
             os.utime(html_path, (max(self.updated, self.updated), self.published))
 
     def delete_html(self, ignore_deleted=False):
+        """Delete the HTML file associated with this article."""
         # Delete a HTML file for a JSON object
         html_path = self.config.html_root / self.html_path
         if not ignore_deleted or html_path.exists():
             html_path.unlink()
 
     def write(self):
-        self.write_json()
+        """Write all the files associated with this article to disk."""
         self.write_html()
+        self.write_json()
 
     def delete(self):
+        """Delete all the files associated with this article."""
         self.delete_html(ignore_deleted=True)
         self.delete_json()
 
     def regenerate(self):
+        """Delete and rewrite all the files associated with this article using to the latest configuration."""
         self.delete()  # paths might change so we preemptively remove the old file
         self.compute_fields()  # recompute formatted datetime & paths from the current configuration
         self.write()  # rewrite JSON & HTML
 
     def was_updated(self, old_article: Article):
         """Returns true if the article is different from a previous version in a way that would require regeneration"""
-        return old_article.get_template_dict() != self.get_template_dict()
+        return old_article._get_template_dict() != self._get_template_dict()
 
 
 class FileArticle(Article):
diff --git a/src/feather/feather.py b/src/feather/feather.py
index e11f501..9b824f3 100755
--- a/src/feather/feather.py
+++ b/src/feather/feather.py
@@ -4,7 +4,7 @@ import asyncio
 import signal
 
 from feather.config import Config
-from feather.client import GReaderSession, TTRSession, ClientSession
+from feather.client import GReaderSession, TTRSession, ClientSession, ArticleId
 from feather.data import FileArticle
 
 
@@ -32,6 +32,12 @@ class FeatherApp:
                 )
         return self._client_session
 
+    def iter_articles(self):
+        """Iterate over all the articles in local storage"""
+        config = self.config
+        for json_path in config.json_root.glob("*.json"):
+            yield FileArticle(config, json_path)
+
     def remove_empty_categories(self):
         """Remove empty directories in the HTML directory"""
         config = self.config
@@ -65,10 +71,8 @@ class FeatherApp:
         marked_as_read, marked_as_unread = 0, 0
         to_mark_as_read = []
         to_mark_as_unread = []
-        for json_path in config.json_root.glob("*.json"):
-            article = FileArticle(config, json_path)
-            html_path = config.html_root / article.html_path
-            if not html_path.exists():
+        for article in self.iter_articles():
+            if not article.has_html():
                 if article.unread:
                     to_mark_as_read.append(article.id)
                     marked_as_read += 1
@@ -96,7 +100,7 @@ class FeatherApp:
         print("Synchronizing with server...")
 
         new_articles, updated_articles = 0, 0
-        grabbed_article_paths = set()
+        grabbed_article_paths: set[ArticleId] = set()
 
         categories = client_session.list_categories()
         for category in categories:
@@ -116,8 +120,8 @@ class FeatherApp:
                     remaining = False
 
                 for article in articles:
+                    grabbed_article_paths.add(article.id)
                     json_path = article.json_path
-                    grabbed_article_paths.add(json_path)
                     if not json_path.exists():
                         article.write()
                         new_articles += 1
@@ -130,9 +134,9 @@ class FeatherApp:
 
         # Remove articles that we didn't get from the server but are in the JSON directory
         removed_articles = 0
-        for article_path in config.json_root.glob("*.json"):
-            if article_path not in grabbed_article_paths:
-                FileArticle(config, article_path).delete()
+        for article in self.iter_articles():
+            if article.id not in grabbed_article_paths:
+                article.delete()
                 removed_articles += 1
 
         print(
@@ -187,13 +191,11 @@ class FeatherApp:
 
     def regenerate_files(self):
         """Regenerate all local files using local data only"""
-        config = self.config
-        for json_path in config.json_root.glob("*.json"):
-            FileArticle(config, json_path).regenerate()
+        for article in self.iter_articles():
+            article.regenerate()
 
     def clear_data(self):
         """Delete all local data"""
-        config = self.config
-        for json_path in config.json_root.glob("*.json"):
-            FileArticle(config, json_path).delete()
+        for article in self.iter_articles():
+            article.delete()
         self.remove_empty_categories()