1
0
Fork 0
mirror of https://codeberg.org/Reuh/feather.git synced 2025-10-27 10:09:32 +00:00

feat: allow keeping read files for a configurable time in local storage

This commit is contained in:
Étienne Fildadut 2025-10-17 14:30:42 +02:00
parent b5474cb376
commit 22b2ea89d5
5 changed files with 30 additions and 10 deletions

View file

@ -6,6 +6,7 @@ from asyncio import Event
from typing import Iterable from typing import Iterable
from watchfiles import awatch from watchfiles import awatch
from pathlib import Path from pathlib import Path
from datetime import datetime
from feather.config import Config from feather.config import Config
from feather.client import GReaderSession, TTRSession, ClientSession, Article, ArticleId from feather.client import GReaderSession, TTRSession, ClientSession, Article, ArticleId
@ -109,13 +110,10 @@ class FeatherApp:
to_mark_as_unread_id[i : i + config.articles_per_query], False to_mark_as_unread_id[i : i + config.articles_per_query], False
) )
# regenerate/delete local file with new read/unread state # regenerate local file with new read/unread state
for article in to_mark_as_read: for article in to_mark_as_read:
article.unread = False article.unread = False
if config.only_sync_unread_articles: article.regenerate()
article.delete()
else:
article.regenerate()
for article in to_mark_as_unread: for article in to_mark_as_unread:
article.unread = True article.unread = True
article.regenerate() article.regenerate()
@ -164,8 +162,16 @@ class FeatherApp:
# Remove articles that we didn't get from the server but are in the JSON directory # Remove articles that we didn't get from the server but are in the JSON directory
removed_articles = 0 removed_articles = 0
article_cutoff_timestamp = (
datetime.now().timestamp() - config.keep_read_articles_for
)
for article in self.iter_articles(): for article in self.iter_articles():
if article.id not in grabbed_article_paths: if (
# we sync all articles: remove all articles that aren't on the server
not config.only_sync_unread_articles
# we only sync unread: only remove articles that are too old
or article.last_write < article_cutoff_timestamp
) and article.id not in grabbed_article_paths:
article.delete() article.delete()
removed_articles += 1 removed_articles += 1

View file

@ -151,6 +151,7 @@ class GReaderArticle(Article):
## Tiny Tiny RSS API ## ## Tiny Tiny RSS API ##
# Monkey patch Headline.__init__ to skip timestamp to datetime conversion # Monkey patch Headline.__init__ to skip timestamp to datetime conversion
# Articles may have a negative timestamp and Python's datetime.fromtimestamp doesn't like that, so instead we keep the timestamp and deal with the issue in data.py/format_datetime # Articles may have a negative timestamp and Python's datetime.fromtimestamp doesn't like that, so instead we keep the timestamp and deal with the issue in data.py/format_datetime
def Headline_init(self, attr, client): def Headline_init(self, attr, client):

View file

@ -24,8 +24,16 @@ password = "password"
# Can be set through the environment variable SERVER_ARTICLES_PER_REQUEST. # Can be set through the environment variable SERVER_ARTICLES_PER_REQUEST.
articles_per_request = 0 articles_per_request = 0
# Set to true to only sync unread articles; Feather will not retrieve any read article from the server. # Set to true to only sync unread articles; Feather will not retrieve any read article from the server.
# If set to false, Feather will download ALL articles from the server, read and unread, on each synchronization. This might be a lot of data depending on how many read articles your server keeps. If you only want to keep recent read articles, look at the keep_read_articles_for settings below.
# Can be set through the environment variable SERVER_ONLY_SYNC_UNREAD_ARTICLES. # Can be set through the environment variable SERVER_ONLY_SYNC_UNREAD_ARTICLES.
only_sync_unread_articles = true only_sync_unread_articles = true
# How long in seconds to keep read articles in the local storage before deleting them.
# Once an article is removed, Feather can no longer:
# - mark it as unread when its article file is restored from the trash;
# - generate articles files for read articles if html.write_read_articles = true.
# If only_sync_unread_articles = false, this does nothing (since Feather always retrieve all read articles from the server).
# Can be set through the environment variable SERVER_KEEP_READ_ARTICLES_FOR.
keep_read_articles_for = 259200
[directories] [directories]
# Data directory: path where the internal Feather data will be stored. # Data directory: path where the internal Feather data will be stored.
@ -37,8 +45,7 @@ reader = "reader"
[html] [html]
# If set to true, Feather will also generate articles files for read articles. # If set to true, Feather will also generate articles files for read articles.
# Also remember to set server.only_sync_unread_articles = false; otherwise this will do nothing. # The the mark-as-unread behavior will change depending on this value:
# The the mark-as-unread behavior will also change depending on this value:
# - if false, marking an article as unread requires its file to be recreated/restored from the trash; # - if false, marking an article as unread requires its file to be recreated/restored from the trash;
# - if true, marking an article as unread requires deleting its article file (same as mark-as-read). # - if true, marking an article as unread requires deleting its article file (same as mark-as-read).
# Can be set through the environment variable HTML_WRITE_READ_ARTICLES. # Can be set through the environment variable HTML_WRITE_READ_ARTICLES.

View file

@ -61,6 +61,9 @@ class Config:
self.only_sync_unread_articles: bool = bool( self.only_sync_unread_articles: bool = bool(
get_config("server", "only_sync_unread_articles") get_config("server", "only_sync_unread_articles")
) )
self.keep_read_articles_for: float = float(
get_config("server", "keep_read_articles_for")
)
self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone"))) self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone")))
self.time_format: str = str(get_config("datetime", "format")) self.time_format: str = str(get_config("datetime", "format"))

View file

@ -16,7 +16,7 @@ from feather.config import Config
def sanitize_filename( def sanitize_filename(
config: Config, filename: str, insert_before_suffix: str = "" config: Config, filename: str, insert_before_suffix: str = ""
) -> str: ) -> str:
"""Escape invalid caracters and truncate the filename as per the configuration. """Escape invalid characters and truncate the filename as per the configuration.
This operates on a single filename, not a path. This operates on a single filename, not a path.
(insert_before_suffix will be inserted between the stem and suffix, and is assumed to not need escaping).""" (insert_before_suffix will be inserted between the stem and suffix, and is assumed to not need escaping)."""
filename = filename.translate(config.filename_translation) filename = filename.translate(config.filename_translation)
@ -39,7 +39,7 @@ def sanitize_filename(
def format_datetime(config: Config, timestamp: int) -> str: def format_datetime(config: Config, timestamp: int) -> str:
"""Format a timestamp according to the configuraiton.""" """Format a timestamp according to the configuration."""
if timestamp < 0: if timestamp < 0:
date = datetime(1970, 1, 1, tzinfo=config.timezone) + timedelta( date = datetime(1970, 1, 1, tzinfo=config.timezone) + timedelta(
seconds=timestamp seconds=timestamp
@ -116,6 +116,7 @@ class Article(ABC):
comments_url: str = "" # article comments URL comments_url: str = "" # article comments URL
language: str = "" # article language language: str = "" # article language
image_url: str = "" # article main image image_url: str = "" # article main image
last_write: int = 0 # last time this article file was written (timestamp)
def _hash_id(self): def _hash_id(self):
return sha256(str(self.id).encode("utf-8")).hexdigest() return sha256(str(self.id).encode("utf-8")).hexdigest()
@ -197,6 +198,7 @@ class Article(ABC):
"language", "language",
"image_url", "image_url",
"html_path", "html_path",
"last_write",
) )
article_json = {field: getattr(self, field) for field in stored_fields} article_json = {field: getattr(self, field) for field in stored_fields}
article_json["category"] = self.category.asdict() article_json["category"] = self.category.asdict()
@ -254,6 +256,7 @@ class Article(ABC):
def write(self, recompute_paths=False): def write(self, recompute_paths=False):
"""Write all the files associated with this article to disk.""" """Write all the files associated with this article to disk."""
self.last_write = datetime.now().timestamp()
if self.unread or self.config.write_read_articles: if self.unread or self.config.write_read_articles:
try: try:
self._write_html(recompute_path=recompute_paths) self._write_html(recompute_path=recompute_paths)