From 960e06252e0f9aa06665ce5cf960cbbcdcd84c8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= Date: Sat, 11 Oct 2025 17:07:34 +0200 Subject: [PATCH] refactor: code comments & cleaning --- src/feather/client.py | 2 +- src/feather/data.py | 86 ++++++++++++++++++++++++++---------------- src/feather/feather.py | 34 +++++++++-------- 3 files changed, 72 insertions(+), 50 deletions(-) diff --git a/src/feather/client.py b/src/feather/client.py index 7839423..012bd07 100644 --- a/src/feather/client.py +++ b/src/feather/client.py @@ -148,7 +148,7 @@ class TTRSession(ClientSession): self.ttrss.login() self.feeds = {} - def set_unread(self, article_ids: list[ArticleId], read: bool = True): + def set_read_flag(self, article_ids: list[ArticleId], read: bool = True): if read: self.ttrss.mark_read(article_ids) else: diff --git a/src/feather/data.py b/src/feather/data.py index 6535afc..572ac9d 100644 --- a/src/feather/data.py +++ b/src/feather/data.py @@ -12,27 +12,33 @@ from hashlib import sha256 from feather.config import Config -def escape_filename(config, filename): - return filename.translate(config.filename_translation) +def sanitize_filename( + config: Config, filename: str, insert_before_suffix: str = "" +) -> str: + """Escape invalid caracters and truncate the filename as per the configuration. + This operates on a single filename, not a path. + (insert_before_suffix will be inserted between the stem and suffix, and is assumed to not need escaping)""" + filename = filename.translate(config.filename_translation) - -def truncate_filename(config, filename): max_filename_length = config.max_filename_length - filename_utf8 = filename.encode("utf-8") - if len(filename_utf8) <= max_filename_length: - return filename + filename_len = len(filename.encode("utf-8")) + insert_before_suffix_len = len(insert_before_suffix.encode("utf-8")) + if filename_len + insert_before_suffix_len <= max_filename_length: + path = Path(filename) + return f"{path.stem}{insert_before_suffix}{path.suffix}" else: suffix = Path(filename).suffix - max_basename_length = max_filename_length - len(suffix.encode("utf-8")) - cutoff = len( - filename.encode("utf-8")[:max_basename_length].decode( - "utf-8", errors="ignore" - ) + max_stem_length = ( + max_filename_length - insert_before_suffix_len - len(suffix.encode("utf-8")) ) - return filename[:cutoff] + "…" + suffix + cutoff = len( + filename.encode("utf-8")[:max_stem_length].decode("utf-8", errors="ignore") + ) + return filename[:cutoff] + "…" + insert_before_suffix + suffix -def format_datetime(config, timestamp): +def format_datetime(config: Config, timestamp: int) -> str: + """Format a timestamp according to the configuraiton""" return datetime.fromtimestamp(timestamp, config.timezone).strftime( config.time_format ) @@ -71,12 +77,12 @@ type ArticleId = int | str class Article(ABC): config: Config - json_path: Path + json_path: Path # JSON path # fields serialized into the JSON file # # no default value - id: ArticleId # article id + id: ArticleId # article unique id category: Category # feed category # no default value, computed by compute_fields published_formatted: str # article publication time (text) @@ -98,27 +104,27 @@ class Article(ABC): comments_url: str = "" # article comments URL language: str = "" # article language image_url: str = "" # article main image - - def get_html_path(self): + + def _get_html_path(self): config = self.config - category_directory = config.html_root + + # Category directory path + category_directory = self.config.html_root for category in self.category.parents: - category_directory /= escape_filename( + category_directory /= sanitize_filename( config, config.article_category_template.render(category.asdict()) ) - category_directory /= escape_filename( + category_directory /= sanitize_filename( config, config.article_category_template.render(self.category.asdict()) ) - html_name = truncate_filename( + # Filename + html_name = sanitize_filename( config, - escape_filename( - config, - config.article_filename_template.render(self.get_template_dict()), - ), + config.article_filename_template.render(self._get_template_dict()), ) - return category_directory / html_name + return html_path.relative_to(config.html_root) def compute_fields(self): config = self.config @@ -158,6 +164,7 @@ class Article(ABC): return d def write_json(self): + """Write the JSON file associated with this article. Error if it already exists.""" stored_fields = ( "id", "unread", @@ -189,45 +196,58 @@ class Article(ABC): json.dump(article_json, f) def delete_json(self): + """Delete the JSON file associated with this article.""" self.json_path.unlink() + def has_html(self): + """Check if the HTML file associated with the article exists on disk.""" + if self.html_path is None: + return False + else: + html_path = self.config.html_root / self.html_path + return html_path.exists() + def write_html(self): # Write HTML file for a JSON object config = self.config html_path = config.html_root / self.html_path - if html_path.exists(): # TODO: does this actually matter - print( - f"WARNING: a file already exist for {html_path}. Either the feed has duplicate entries, or something has gone terribly wrong." + if html_path.exists(): + raise Exception( + f"Unexpectedly tried to overwrite article file for {html_path}. Either the feed has duplicate entries, or something has gone terribly wrong." ) else: html_path.parent.mkdir(parents=True, exist_ok=True) with html_path.open("w") as f: - f.write(config.article_template.render(self.get_template_dict())) + f.write(config.article_template.render(self._get_template_dict())) # set accessed date to update time, modified to publication time os.utime(html_path, (max(self.updated, self.updated), self.published)) def delete_html(self, ignore_deleted=False): + """Delete the HTML file associated with this article.""" # Delete a HTML file for a JSON object html_path = self.config.html_root / self.html_path if not ignore_deleted or html_path.exists(): html_path.unlink() def write(self): - self.write_json() + """Write all the files associated with this article to disk.""" self.write_html() + self.write_json() def delete(self): + """Delete all the files associated with this article.""" self.delete_html(ignore_deleted=True) self.delete_json() def regenerate(self): + """Delete and rewrite all the files associated with this article using to the latest configuration.""" self.delete() # paths might change so we preemptively remove the old file self.compute_fields() # recompute formatted datetime & paths from the current configuration self.write() # rewrite JSON & HTML def was_updated(self, old_article: Article): """Returns true if the article is different from a previous version in a way that would require regeneration""" - return old_article.get_template_dict() != self.get_template_dict() + return old_article._get_template_dict() != self._get_template_dict() class FileArticle(Article): diff --git a/src/feather/feather.py b/src/feather/feather.py index e11f501..9b824f3 100755 --- a/src/feather/feather.py +++ b/src/feather/feather.py @@ -4,7 +4,7 @@ import asyncio import signal from feather.config import Config -from feather.client import GReaderSession, TTRSession, ClientSession +from feather.client import GReaderSession, TTRSession, ClientSession, ArticleId from feather.data import FileArticle @@ -32,6 +32,12 @@ class FeatherApp: ) return self._client_session + def iter_articles(self): + """Iterate over all the articles in local storage""" + config = self.config + for json_path in config.json_root.glob("*.json"): + yield FileArticle(config, json_path) + def remove_empty_categories(self): """Remove empty directories in the HTML directory""" config = self.config @@ -65,10 +71,8 @@ class FeatherApp: marked_as_read, marked_as_unread = 0, 0 to_mark_as_read = [] to_mark_as_unread = [] - for json_path in config.json_root.glob("*.json"): - article = FileArticle(config, json_path) - html_path = config.html_root / article.html_path - if not html_path.exists(): + for article in self.iter_articles(): + if not article.has_html(): if article.unread: to_mark_as_read.append(article.id) marked_as_read += 1 @@ -96,7 +100,7 @@ class FeatherApp: print("Synchronizing with server...") new_articles, updated_articles = 0, 0 - grabbed_article_paths = set() + grabbed_article_paths: set[ArticleId] = set() categories = client_session.list_categories() for category in categories: @@ -116,8 +120,8 @@ class FeatherApp: remaining = False for article in articles: + grabbed_article_paths.add(article.id) json_path = article.json_path - grabbed_article_paths.add(json_path) if not json_path.exists(): article.write() new_articles += 1 @@ -130,9 +134,9 @@ class FeatherApp: # Remove articles that we didn't get from the server but are in the JSON directory removed_articles = 0 - for article_path in config.json_root.glob("*.json"): - if article_path not in grabbed_article_paths: - FileArticle(config, article_path).delete() + for article in self.iter_articles(): + if article.id not in grabbed_article_paths: + article.delete() removed_articles += 1 print( @@ -187,13 +191,11 @@ class FeatherApp: def regenerate_files(self): """Regenerate all local files using local data only""" - config = self.config - for json_path in config.json_root.glob("*.json"): - FileArticle(config, json_path).regenerate() + for article in self.iter_articles(): + article.regenerate() def clear_data(self): """Delete all local data""" - config = self.config - for json_path in config.json_root.glob("*.json"): - FileArticle(config, json_path).delete() + for article in self.iter_articles(): + article.delete() self.remove_empty_categories()