feat: allow keeping read files for a configurable time in local storage

2025-10-27 10:09:32 +00:00 · 2025-10-17 14:30:42 +02:00 · 2025-10-17 14:30:42 +02:00 · 22b2ea89d5
commit 22b2ea89d5
parent b5474cb376
5 changed files with 30 additions and 10 deletions
--- a/src/feather/app.py
+++ b/src/feather/app.py
@ -6,6 +6,7 @@ from asyncio import Event
 from typing import Iterable
 from watchfiles import awatch
 from pathlib import Path
 from datetime import datetime
 from feather.config import Config
 from feather.client import GReaderSession, TTRSession, ClientSession, Article, ArticleId
@ -109,13 +110,10 @@ class FeatherApp:
                to_mark_as_unread_id[i : i + config.articles_per_query], False
            )
-        # regenerate/delete local file with new read/unread state
+        # regenerate local file with new read/unread state
        for article in to_mark_as_read:
            article.unread = False
-            if config.only_sync_unread_articles:
+            article.regenerate()
                article.delete()
            else:
                article.regenerate()
        for article in to_mark_as_unread:
            article.unread = True
            article.regenerate()
@ -164,8 +162,16 @@ class FeatherApp:
        # Remove articles that we didn't get from the server but are in the JSON directory
        removed_articles = 0
        article_cutoff_timestamp = (
            datetime.now().timestamp() - config.keep_read_articles_for
        )
        for article in self.iter_articles():
-            if article.id not in grabbed_article_paths:
+            if (
                # we sync all articles: remove all articles that aren't on the server
                not config.only_sync_unread_articles
                # we only sync unread: only remove articles that are too old
                or article.last_write < article_cutoff_timestamp
            ) and article.id not in grabbed_article_paths:
                article.delete()
                removed_articles += 1
--- a/src/feather/client.py
+++ b/src/feather/client.py
@ -151,6 +151,7 @@ class GReaderArticle(Article):
 ## Tiny Tiny RSS API ##
 # Monkey patch Headline.__init__ to skip timestamp to datetime conversion
 # Articles may have a negative timestamp and Python's datetime.fromtimestamp doesn't like that, so instead we keep the timestamp and deal with the issue in data.py/format_datetime
 def Headline_init(self, attr, client):
--- a/src/feather/config.default.toml
+++ b/src/feather/config.default.toml
@ -24,8 +24,16 @@ password = "password"
 # Can be set through the environment variable SERVER_ARTICLES_PER_REQUEST.
 articles_per_request = 0
 # Set to true to only sync unread articles; Feather will not retrieve any read article from the server.
 # If set to false, Feather will download ALL articles from the server, read and unread, on each synchronization. This might be a lot of data depending on how many read articles your server keeps. If you only want to keep recent read articles, look at the keep_read_articles_for settings below.
 # Can be set through the environment variable SERVER_ONLY_SYNC_UNREAD_ARTICLES.
 only_sync_unread_articles = true
 # How long in seconds to keep read articles in the local storage before deleting them.
 # Once an article is removed, Feather can no longer:
 # - mark it as unread when its article file is restored from the trash;
 # - generate articles files for read articles if html.write_read_articles = true.
 # If only_sync_unread_articles = false, this does nothing (since Feather always retrieve all read articles from the server).
 # Can be set through the environment variable SERVER_KEEP_READ_ARTICLES_FOR.
 keep_read_articles_for = 259200
 [directories]
 # Data directory: path where the internal Feather data will be stored.
@ -37,8 +45,7 @@ reader = "reader"
 [html]
 # If set to true, Feather will also generate articles files for read articles.
-# Also remember to set server.only_sync_unread_articles = false; otherwise this will do nothing.
+# The the mark-as-unread behavior will change depending on this value:
 # The the mark-as-unread behavior will also change depending on this value:
 # - if false, marking an article as unread requires its file to be recreated/restored from the trash;
 # - if true, marking an article as unread requires deleting its article file (same as mark-as-read).
 # Can be set through the environment variable HTML_WRITE_READ_ARTICLES.
--- a/src/feather/config.py
+++ b/src/feather/config.py
@ -61,6 +61,9 @@ class Config:
        self.only_sync_unread_articles: bool = bool(
            get_config("server", "only_sync_unread_articles")
        )
        self.keep_read_articles_for: float = float(
            get_config("server", "keep_read_articles_for")
        )
        self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone")))
        self.time_format: str = str(get_config("datetime", "format"))
--- a/src/feather/data.py
+++ b/src/feather/data.py
@ -16,7 +16,7 @@ from feather.config import Config
 def sanitize_filename(
    config: Config, filename: str, insert_before_suffix: str = ""
 ) -> str:
-    """Escape invalid caracters and truncate the filename as per the configuration.
+    """Escape invalid characters and truncate the filename as per the configuration.
    This operates on a single filename, not a path.
    (insert_before_suffix will be inserted between the stem and suffix, and is assumed to not need escaping)."""
    filename = filename.translate(config.filename_translation)
@ -39,7 +39,7 @@ def sanitize_filename(
 def format_datetime(config: Config, timestamp: int) -> str:
-    """Format a timestamp according to the configuraiton."""
+    """Format a timestamp according to the configuration."""
    if timestamp < 0:
        date = datetime(1970, 1, 1, tzinfo=config.timezone) + timedelta(
            seconds=timestamp
@ -116,6 +116,7 @@ class Article(ABC):
    comments_url: str = ""  # article comments URL
    language: str = ""  # article language
    image_url: str = ""  # article main image
    last_write: int = 0  # last time this article file was written (timestamp)
    def _hash_id(self):
        return sha256(str(self.id).encode("utf-8")).hexdigest()
@ -197,6 +198,7 @@ class Article(ABC):
            "language",
            "image_url",
            "html_path",
            "last_write",
        )
        article_json = {field: getattr(self, field) for field in stored_fields}
        article_json["category"] = self.category.asdict()
@ -254,6 +256,7 @@ class Article(ABC):
    def write(self, recompute_paths=False):
        """Write all the files associated with this article to disk."""
        self.last_write = datetime.now().timestamp()
        if self.unread or self.config.write_read_articles:
            try:
                self._write_html(recompute_path=recompute_paths)