From 22b2ea89d54cc43c4defddf2a59e95574746bd78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= Date: Fri, 17 Oct 2025 14:30:42 +0200 Subject: [PATCH] feat: allow keeping read files for a configurable time in local storage --- src/feather/app.py | 18 ++++++++++++------ src/feather/client.py | 1 + src/feather/config.default.toml | 11 +++++++++-- src/feather/config.py | 3 +++ src/feather/data.py | 7 +++++-- 5 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/feather/app.py b/src/feather/app.py index 27f90d7..3ef5697 100755 --- a/src/feather/app.py +++ b/src/feather/app.py @@ -6,6 +6,7 @@ from asyncio import Event from typing import Iterable from watchfiles import awatch from pathlib import Path +from datetime import datetime from feather.config import Config from feather.client import GReaderSession, TTRSession, ClientSession, Article, ArticleId @@ -109,13 +110,10 @@ class FeatherApp: to_mark_as_unread_id[i : i + config.articles_per_query], False ) - # regenerate/delete local file with new read/unread state + # regenerate local file with new read/unread state for article in to_mark_as_read: article.unread = False - if config.only_sync_unread_articles: - article.delete() - else: - article.regenerate() + article.regenerate() for article in to_mark_as_unread: article.unread = True article.regenerate() @@ -164,8 +162,16 @@ class FeatherApp: # Remove articles that we didn't get from the server but are in the JSON directory removed_articles = 0 + article_cutoff_timestamp = ( + datetime.now().timestamp() - config.keep_read_articles_for + ) for article in self.iter_articles(): - if article.id not in grabbed_article_paths: + if ( + # we sync all articles: remove all articles that aren't on the server + not config.only_sync_unread_articles + # we only sync unread: only remove articles that are too old + or article.last_write < article_cutoff_timestamp + ) and article.id not in grabbed_article_paths: article.delete() removed_articles += 1 diff --git a/src/feather/client.py b/src/feather/client.py index 7940821..e5ca7b7 100644 --- a/src/feather/client.py +++ b/src/feather/client.py @@ -151,6 +151,7 @@ class GReaderArticle(Article): ## Tiny Tiny RSS API ## + # Monkey patch Headline.__init__ to skip timestamp to datetime conversion # Articles may have a negative timestamp and Python's datetime.fromtimestamp doesn't like that, so instead we keep the timestamp and deal with the issue in data.py/format_datetime def Headline_init(self, attr, client): diff --git a/src/feather/config.default.toml b/src/feather/config.default.toml index ce3629b..1b12c4a 100644 --- a/src/feather/config.default.toml +++ b/src/feather/config.default.toml @@ -24,8 +24,16 @@ password = "password" # Can be set through the environment variable SERVER_ARTICLES_PER_REQUEST. articles_per_request = 0 # Set to true to only sync unread articles; Feather will not retrieve any read article from the server. +# If set to false, Feather will download ALL articles from the server, read and unread, on each synchronization. This might be a lot of data depending on how many read articles your server keeps. If you only want to keep recent read articles, look at the keep_read_articles_for settings below. # Can be set through the environment variable SERVER_ONLY_SYNC_UNREAD_ARTICLES. only_sync_unread_articles = true +# How long in seconds to keep read articles in the local storage before deleting them. +# Once an article is removed, Feather can no longer: +# - mark it as unread when its article file is restored from the trash; +# - generate articles files for read articles if html.write_read_articles = true. +# If only_sync_unread_articles = false, this does nothing (since Feather always retrieve all read articles from the server). +# Can be set through the environment variable SERVER_KEEP_READ_ARTICLES_FOR. +keep_read_articles_for = 259200 [directories] # Data directory: path where the internal Feather data will be stored. @@ -37,8 +45,7 @@ reader = "reader" [html] # If set to true, Feather will also generate articles files for read articles. -# Also remember to set server.only_sync_unread_articles = false; otherwise this will do nothing. -# The the mark-as-unread behavior will also change depending on this value: +# The the mark-as-unread behavior will change depending on this value: # - if false, marking an article as unread requires its file to be recreated/restored from the trash; # - if true, marking an article as unread requires deleting its article file (same as mark-as-read). # Can be set through the environment variable HTML_WRITE_READ_ARTICLES. diff --git a/src/feather/config.py b/src/feather/config.py index 7f72f09..98d10f2 100644 --- a/src/feather/config.py +++ b/src/feather/config.py @@ -61,6 +61,9 @@ class Config: self.only_sync_unread_articles: bool = bool( get_config("server", "only_sync_unread_articles") ) + self.keep_read_articles_for: float = float( + get_config("server", "keep_read_articles_for") + ) self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone"))) self.time_format: str = str(get_config("datetime", "format")) diff --git a/src/feather/data.py b/src/feather/data.py index 958f45e..a6af71f 100644 --- a/src/feather/data.py +++ b/src/feather/data.py @@ -16,7 +16,7 @@ from feather.config import Config def sanitize_filename( config: Config, filename: str, insert_before_suffix: str = "" ) -> str: - """Escape invalid caracters and truncate the filename as per the configuration. + """Escape invalid characters and truncate the filename as per the configuration. This operates on a single filename, not a path. (insert_before_suffix will be inserted between the stem and suffix, and is assumed to not need escaping).""" filename = filename.translate(config.filename_translation) @@ -39,7 +39,7 @@ def sanitize_filename( def format_datetime(config: Config, timestamp: int) -> str: - """Format a timestamp according to the configuraiton.""" + """Format a timestamp according to the configuration.""" if timestamp < 0: date = datetime(1970, 1, 1, tzinfo=config.timezone) + timedelta( seconds=timestamp @@ -116,6 +116,7 @@ class Article(ABC): comments_url: str = "" # article comments URL language: str = "" # article language image_url: str = "" # article main image + last_write: int = 0 # last time this article file was written (timestamp) def _hash_id(self): return sha256(str(self.id).encode("utf-8")).hexdigest() @@ -197,6 +198,7 @@ class Article(ABC): "language", "image_url", "html_path", + "last_write", ) article_json = {field: getattr(self, field) for field in stored_fields} article_json["category"] = self.category.asdict() @@ -254,6 +256,7 @@ class Article(ABC): def write(self, recompute_paths=False): """Write all the files associated with this article to disk.""" + self.last_write = datetime.now().timestamp() if self.unread or self.config.write_read_articles: try: self._write_html(recompute_path=recompute_paths)