feat: allow keeping read files for a configurable time in local storage

2026-02-04 03:38:38 +00:00 · 2025-10-17 14:30:42 +02:00 · 2025-10-17 14:30:42 +02:00 · 22b2ea89d5
commit 22b2ea89d5
parent b5474cb376
5 changed files with 30 additions and 10 deletions
--- a/src/feather/app.py
+++ b/src/feather/app.py
@ -6,6 +6,7 @@ from asyncio import Event
 from typing import Iterable
 from watchfiles import awatch
 from pathlib import Path
+from datetime import datetime

 from feather.config import Config
 from feather.client import GReaderSession, TTRSession, ClientSession, Article, ArticleId
@ -109,13 +110,10 @@ class FeatherApp:
                to_mark_as_unread_id[i : i + config.articles_per_query], False
            )

-        # regenerate/delete local file with new read/unread state
+        # regenerate local file with new read/unread state
        for article in to_mark_as_read:
            article.unread = False
-            if config.only_sync_unread_articles:
-                article.delete()
-            else:
-                article.regenerate()
+            article.regenerate()
        for article in to_mark_as_unread:
            article.unread = True
            article.regenerate()
@ -164,8 +162,16 @@ class FeatherApp:

        # Remove articles that we didn't get from the server but are in the JSON directory
        removed_articles = 0
+        article_cutoff_timestamp = (
+            datetime.now().timestamp() - config.keep_read_articles_for
+        )
        for article in self.iter_articles():
-            if article.id not in grabbed_article_paths:
+            if (
+                # we sync all articles: remove all articles that aren't on the server
+                not config.only_sync_unread_articles
+                # we only sync unread: only remove articles that are too old
+                or article.last_write < article_cutoff_timestamp
+            ) and article.id not in grabbed_article_paths:
                article.delete()
                removed_articles += 1

--- a/src/feather/client.py
+++ b/src/feather/client.py
@ -151,6 +151,7 @@ class GReaderArticle(Article):

 ## Tiny Tiny RSS API ##

+
 # Monkey patch Headline.__init__ to skip timestamp to datetime conversion
 # Articles may have a negative timestamp and Python's datetime.fromtimestamp doesn't like that, so instead we keep the timestamp and deal with the issue in data.py/format_datetime
 def Headline_init(self, attr, client):
--- a/src/feather/config.default.toml
+++ b/src/feather/config.default.toml
@ -24,8 +24,16 @@ password = "password"
 # Can be set through the environment variable SERVER_ARTICLES_PER_REQUEST.
 articles_per_request = 0
 # Set to true to only sync unread articles; Feather will not retrieve any read article from the server.
+# If set to false, Feather will download ALL articles from the server, read and unread, on each synchronization. This might be a lot of data depending on how many read articles your server keeps. If you only want to keep recent read articles, look at the keep_read_articles_for settings below.
 # Can be set through the environment variable SERVER_ONLY_SYNC_UNREAD_ARTICLES.
 only_sync_unread_articles = true
+# How long in seconds to keep read articles in the local storage before deleting them.
+# Once an article is removed, Feather can no longer:
+# - mark it as unread when its article file is restored from the trash;
+# - generate articles files for read articles if html.write_read_articles = true.
+# If only_sync_unread_articles = false, this does nothing (since Feather always retrieve all read articles from the server).
+# Can be set through the environment variable SERVER_KEEP_READ_ARTICLES_FOR.
+keep_read_articles_for = 259200

 [directories]
 # Data directory: path where the internal Feather data will be stored.
@ -37,8 +45,7 @@ reader = "reader"

 [html]
 # If set to true, Feather will also generate articles files for read articles.
-# Also remember to set server.only_sync_unread_articles = false; otherwise this will do nothing.
-# The the mark-as-unread behavior will also change depending on this value:
+# The the mark-as-unread behavior will change depending on this value:
 # - if false, marking an article as unread requires its file to be recreated/restored from the trash;
 # - if true, marking an article as unread requires deleting its article file (same as mark-as-read).
 # Can be set through the environment variable HTML_WRITE_READ_ARTICLES.
--- a/src/feather/config.py
+++ b/src/feather/config.py
@ -61,6 +61,9 @@ class Config:
        self.only_sync_unread_articles: bool = bool(
            get_config("server", "only_sync_unread_articles")
        )
+        self.keep_read_articles_for: float = float(
+            get_config("server", "keep_read_articles_for")
+        )

        self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone")))
        self.time_format: str = str(get_config("datetime", "format"))
--- a/src/feather/data.py
+++ b/src/feather/data.py
@ -16,7 +16,7 @@ from feather.config import Config
 def sanitize_filename(
    config: Config, filename: str, insert_before_suffix: str = ""
 ) -> str:
-    """Escape invalid caracters and truncate the filename as per the configuration.
+    """Escape invalid characters and truncate the filename as per the configuration.
    This operates on a single filename, not a path.
    (insert_before_suffix will be inserted between the stem and suffix, and is assumed to not need escaping)."""
    filename = filename.translate(config.filename_translation)
@ -39,7 +39,7 @@ def sanitize_filename(


 def format_datetime(config: Config, timestamp: int) -> str:
-    """Format a timestamp according to the configuraiton."""
+    """Format a timestamp according to the configuration."""
    if timestamp < 0:
        date = datetime(1970, 1, 1, tzinfo=config.timezone) + timedelta(
            seconds=timestamp
@ -116,6 +116,7 @@ class Article(ABC):
    comments_url: str = ""  # article comments URL
    language: str = ""  # article language
    image_url: str = ""  # article main image
+    last_write: int = 0  # last time this article file was written (timestamp)

    def _hash_id(self):
        return sha256(str(self.id).encode("utf-8")).hexdigest()
@ -197,6 +198,7 @@ class Article(ABC):
            "language",
            "image_url",
            "html_path",
+            "last_write",
        )
        article_json = {field: getattr(self, field) for field in stored_fields}
        article_json["category"] = self.category.asdict()
@ -254,6 +256,7 @@ class Article(ABC):

    def write(self, recompute_paths=False):
        """Write all the files associated with this article to disk."""
+        self.last_write = datetime.now().timestamp()
        if self.unread or self.config.write_read_articles:
            try:
                self._write_html(recompute_path=recompute_paths)