From 22b2ea89d54cc43c4defddf2a59e95574746bd78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= <fildadut@reuh.eu>
Date: Fri, 17 Oct 2025 14:30:42 +0200
Subject: [PATCH] feat: allow keeping read files for a configurable time in
 local storage

---
 src/feather/app.py              | 18 ++++++++++++------
 src/feather/client.py           |  1 +
 src/feather/config.default.toml | 11 +++++++++--
 src/feather/config.py           |  3 +++
 src/feather/data.py             |  7 +++++--
 5 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/src/feather/app.py b/src/feather/app.py
index 27f90d7..3ef5697 100755
--- a/src/feather/app.py
+++ b/src/feather/app.py
@@ -6,6 +6,7 @@ from asyncio import Event
 from typing import Iterable
 from watchfiles import awatch
 from pathlib import Path
+from datetime import datetime
 
 from feather.config import Config
 from feather.client import GReaderSession, TTRSession, ClientSession, Article, ArticleId
@@ -109,13 +110,10 @@ class FeatherApp:
                 to_mark_as_unread_id[i : i + config.articles_per_query], False
             )
 
-        # regenerate/delete local file with new read/unread state
+        # regenerate local file with new read/unread state
         for article in to_mark_as_read:
             article.unread = False
-            if config.only_sync_unread_articles:
-                article.delete()
-            else:
-                article.regenerate()
+            article.regenerate()
         for article in to_mark_as_unread:
             article.unread = True
             article.regenerate()
@@ -164,8 +162,16 @@ class FeatherApp:
 
         # Remove articles that we didn't get from the server but are in the JSON directory
         removed_articles = 0
+        article_cutoff_timestamp = (
+            datetime.now().timestamp() - config.keep_read_articles_for
+        )
         for article in self.iter_articles():
-            if article.id not in grabbed_article_paths:
+            if (
+                # we sync all articles: remove all articles that aren't on the server
+                not config.only_sync_unread_articles
+                # we only sync unread: only remove articles that are too old
+                or article.last_write < article_cutoff_timestamp
+            ) and article.id not in grabbed_article_paths:
                 article.delete()
                 removed_articles += 1
 
diff --git a/src/feather/client.py b/src/feather/client.py
index 7940821..e5ca7b7 100644
--- a/src/feather/client.py
+++ b/src/feather/client.py
@@ -151,6 +151,7 @@ class GReaderArticle(Article):
 
 ## Tiny Tiny RSS API ##
 
+
 # Monkey patch Headline.__init__ to skip timestamp to datetime conversion
 # Articles may have a negative timestamp and Python's datetime.fromtimestamp doesn't like that, so instead we keep the timestamp and deal with the issue in data.py/format_datetime
 def Headline_init(self, attr, client):
diff --git a/src/feather/config.default.toml b/src/feather/config.default.toml
index ce3629b..1b12c4a 100644
--- a/src/feather/config.default.toml
+++ b/src/feather/config.default.toml
@@ -24,8 +24,16 @@ password = "password"
 # Can be set through the environment variable SERVER_ARTICLES_PER_REQUEST.
 articles_per_request = 0
 # Set to true to only sync unread articles; Feather will not retrieve any read article from the server.
+# If set to false, Feather will download ALL articles from the server, read and unread, on each synchronization. This might be a lot of data depending on how many read articles your server keeps. If you only want to keep recent read articles, look at the keep_read_articles_for settings below.
 # Can be set through the environment variable SERVER_ONLY_SYNC_UNREAD_ARTICLES.
 only_sync_unread_articles = true
+# How long in seconds to keep read articles in the local storage before deleting them.
+# Once an article is removed, Feather can no longer:
+# - mark it as unread when its article file is restored from the trash;
+# - generate articles files for read articles if html.write_read_articles = true.
+# If only_sync_unread_articles = false, this does nothing (since Feather always retrieve all read articles from the server).
+# Can be set through the environment variable SERVER_KEEP_READ_ARTICLES_FOR.
+keep_read_articles_for = 259200
 
 [directories]
 # Data directory: path where the internal Feather data will be stored.
@@ -37,8 +45,7 @@ reader = "reader"
 
 [html]
 # If set to true, Feather will also generate articles files for read articles.
-# Also remember to set server.only_sync_unread_articles = false; otherwise this will do nothing.
-# The the mark-as-unread behavior will also change depending on this value:
+# The the mark-as-unread behavior will change depending on this value:
 # - if false, marking an article as unread requires its file to be recreated/restored from the trash;
 # - if true, marking an article as unread requires deleting its article file (same as mark-as-read).
 # Can be set through the environment variable HTML_WRITE_READ_ARTICLES.
diff --git a/src/feather/config.py b/src/feather/config.py
index 7f72f09..98d10f2 100644
--- a/src/feather/config.py
+++ b/src/feather/config.py
@@ -61,6 +61,9 @@ class Config:
         self.only_sync_unread_articles: bool = bool(
             get_config("server", "only_sync_unread_articles")
         )
+        self.keep_read_articles_for: float = float(
+            get_config("server", "keep_read_articles_for")
+        )
 
         self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone")))
         self.time_format: str = str(get_config("datetime", "format"))
diff --git a/src/feather/data.py b/src/feather/data.py
index 958f45e..a6af71f 100644
--- a/src/feather/data.py
+++ b/src/feather/data.py
@@ -16,7 +16,7 @@ from feather.config import Config
 def sanitize_filename(
     config: Config, filename: str, insert_before_suffix: str = ""
 ) -> str:
-    """Escape invalid caracters and truncate the filename as per the configuration.
+    """Escape invalid characters and truncate the filename as per the configuration.
     This operates on a single filename, not a path.
     (insert_before_suffix will be inserted between the stem and suffix, and is assumed to not need escaping)."""
     filename = filename.translate(config.filename_translation)
@@ -39,7 +39,7 @@ def sanitize_filename(
 
 
 def format_datetime(config: Config, timestamp: int) -> str:
-    """Format a timestamp according to the configuraiton."""
+    """Format a timestamp according to the configuration."""
     if timestamp < 0:
         date = datetime(1970, 1, 1, tzinfo=config.timezone) + timedelta(
             seconds=timestamp
@@ -116,6 +116,7 @@ class Article(ABC):
     comments_url: str = ""  # article comments URL
     language: str = ""  # article language
     image_url: str = ""  # article main image
+    last_write: int = 0  # last time this article file was written (timestamp)
 
     def _hash_id(self):
         return sha256(str(self.id).encode("utf-8")).hexdigest()
@@ -197,6 +198,7 @@ class Article(ABC):
             "language",
             "image_url",
             "html_path",
+            "last_write",
         )
         article_json = {field: getattr(self, field) for field in stored_fields}
         article_json["category"] = self.category.asdict()
@@ -254,6 +256,7 @@ class Article(ABC):
 
     def write(self, recompute_paths=False):
         """Write all the files associated with this article to disk."""
+        self.last_write = datetime.now().timestamp()
         if self.unread or self.config.write_read_articles:
             try:
                 self._write_html(recompute_path=recompute_paths)