From 48c2c0f85055506031780802d47c82c22a3580f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= Date: Fri, 17 Oct 2025 13:57:08 +0200 Subject: [PATCH 1/7] fix: handle negative timestamps --- src/feather/client.py | 18 +++++++++++++----- src/feather/data.py | 18 +++++++++++------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/feather/client.py b/src/feather/client.py index 66c9bba..7940821 100644 --- a/src/feather/client.py +++ b/src/feather/client.py @@ -4,7 +4,7 @@ from __future__ import annotations import re from abc import ABC, abstractmethod -from ttrss.client import TTRClient +from ttrss.client import TTRClient, Headline import google_reader from feather.config import Config @@ -146,11 +146,19 @@ class GReaderArticle(Article): # several API references I've seen didn't mention canonical, but alternate seems to also be the article link (?) and should be an ok fallback self.article_url = item_content.alternate[0].href - self._compute_json_path() + self.json_path = self._get_json_path() ## Tiny Tiny RSS API ## +# Monkey patch Headline.__init__ to skip timestamp to datetime conversion +# Articles may have a negative timestamp and Python's datetime.fromtimestamp doesn't like that, so instead we keep the timestamp and deal with the issue in data.py/format_datetime +def Headline_init(self, attr, client): + super(Headline, self).__init__(attr, client) + + +Headline.__init__ = Headline_init + class TTRSession(ClientSession): """Tiny Tiny RSS API client""" @@ -234,8 +242,8 @@ class TTRArticle(Article): self.unread = article.unread self.title = article.title - self.published = article.updated.timestamp() - self.updated = article.updated.timestamp() + self.published = article.updated + self.updated = article.updated self.author = article.author self.summary = article.excerpt self.content = article.content @@ -248,4 +256,4 @@ class TTRArticle(Article): self.language = article.lang self.image_url = article.flavor_image - self._compute_json_path() + self.json_path = self._get_json_path() diff --git a/src/feather/data.py b/src/feather/data.py index b9a84ca..09738e4 100644 --- a/src/feather/data.py +++ b/src/feather/data.py @@ -5,7 +5,7 @@ from __future__ import annotations import os import json from abc import ABC -from datetime import datetime +from datetime import datetime, timedelta from pathlib import Path from hashlib import sha256 from tempfile import NamedTemporaryFile @@ -40,9 +40,13 @@ def sanitize_filename( def format_datetime(config: Config, timestamp: int) -> str: """Format a timestamp according to the configuraiton.""" - return datetime.fromtimestamp(timestamp, config.timezone).strftime( - config.time_format - ) + if timestamp < 0: + date = datetime(1970, 1, 1, tzinfo=config.timezone) + timedelta( + seconds=timestamp + ) + else: + date = datetime.fromtimestamp(timestamp, config.timezone) + return date.strftime(config.time_format) def atomic_write(path: Path, content: str): @@ -168,13 +172,13 @@ class Article(ABC): d["category"] = self.category.asdict() return d - def _compute_json_path(self): - self.json_path = self.config.json_root / f"{self._hash_id()}.json" + def _get_json_path(self) -> Path: + return self.config.json_root / f"{self._hash_id()}.json" def _write_json(self, recompute_path=False): """Write the JSON file associated with this article. Error if it already exists.""" if recompute_path: - self._compute_json_path() + self.json_path = self._get_json_path() stored_fields = ( "id", "unread", From b5474cb3762929f1b57948aebde51ed0031b8073 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= Date: Fri, 17 Oct 2025 13:59:00 +0200 Subject: [PATCH 2/7] feat: allow marking items as unread by restoring their files and add option to not write read articles even when synced --- src/feather/app.py | 18 +++++++++++------- src/feather/config.default.toml | 9 ++++++++- src/feather/config.py | 1 + src/feather/data.py | 22 +++++++++++++--------- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/src/feather/app.py b/src/feather/app.py index 4db3319..27f90d7 100755 --- a/src/feather/app.py +++ b/src/feather/app.py @@ -82,13 +82,16 @@ class FeatherApp: to_mark_as_read = [] to_mark_as_unread = [] for article in self.iter_articles(): - if not article.has_html(): - if article.unread: - to_mark_as_read.append(article) - marked_as_read += 1 - else: - to_mark_as_unread.append(article) - marked_as_unread += 1 + has_html = article.has_html() + if article.unread and not has_html: + to_mark_as_read.append(article) + marked_as_read += 1 + elif not article.unread and ( + (config.write_read_articles and not has_html) + or (not config.write_read_articles and has_html) + ): + to_mark_as_unread.append(article) + marked_as_unread += 1 if len(to_mark_as_read) == len(to_mark_as_unread) == 0: return # nothing to do @@ -260,6 +263,7 @@ class FeatherApp: """Regenerate all local files using local data only""" for article in self.iter_articles(): article.regenerate() + self.remove_empty_categories() def clear_data(self): """Delete all local data""" diff --git a/src/feather/config.default.toml b/src/feather/config.default.toml index f5860c8..ce3629b 100644 --- a/src/feather/config.default.toml +++ b/src/feather/config.default.toml @@ -23,7 +23,7 @@ password = "password" # Set to 0 to let Feather choose (200 for ttrss, 1000 for googlereader). # Can be set through the environment variable SERVER_ARTICLES_PER_REQUEST. articles_per_request = 0 -# Set to true to only sync unread articles; Feather will not retrieve or store any read article. +# Set to true to only sync unread articles; Feather will not retrieve any read article from the server. # Can be set through the environment variable SERVER_ONLY_SYNC_UNREAD_ARTICLES. only_sync_unread_articles = true @@ -36,6 +36,13 @@ data = "data" reader = "reader" [html] +# If set to true, Feather will also generate articles files for read articles. +# Also remember to set server.only_sync_unread_articles = false; otherwise this will do nothing. +# The the mark-as-unread behavior will also change depending on this value: +# - if false, marking an article as unread requires its file to be recreated/restored from the trash; +# - if true, marking an article as unread requires deleting its article file (same as mark-as-read). +# Can be set through the environment variable HTML_WRITE_READ_ARTICLES. +write_read_articles = false # Template used for generating article HTML files. All templates are Jinja2 templates. # Available fields: # - id: article id (int | str) diff --git a/src/feather/config.py b/src/feather/config.py index b61508b..7f72f09 100644 --- a/src/feather/config.py +++ b/src/feather/config.py @@ -65,6 +65,7 @@ class Config: self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone"))) self.time_format: str = str(get_config("datetime", "format")) + self.write_read_articles: bool = bool(get_config("html", "write_read_articles")) self.article_template: Template = Template( str(get_config("html", "article_template")), autoescape=True ) diff --git a/src/feather/data.py b/src/feather/data.py index 09738e4..958f45e 100644 --- a/src/feather/data.py +++ b/src/feather/data.py @@ -234,8 +234,11 @@ class Article(ABC): def _delete_html(self, missing_ok=False): """Delete the HTML file associated with this article.""" # Delete a HTML file for a JSON object - html_path = self.config.html_root / self.html_path - html_path.unlink(missing_ok=missing_ok) + if self.html_path is None: + return + else: + html_path = self.config.html_root / self.html_path + html_path.unlink(missing_ok=missing_ok) def has_html(self) -> bool: """Check if the HTML file associated with the article exists on disk.""" @@ -251,13 +254,14 @@ class Article(ABC): def write(self, recompute_paths=False): """Write all the files associated with this article to disk.""" - try: - self._write_html(recompute_path=recompute_paths) - except FileExistsError: - raise - except: - self._delete_html(missing_ok=True) - raise + if self.unread or self.config.write_read_articles: + try: + self._write_html(recompute_path=recompute_paths) + except FileExistsError: + raise + except: + self._delete_html(missing_ok=True) + raise try: self._write_json(recompute_path=recompute_paths) except FileExistsError: From 22b2ea89d54cc43c4defddf2a59e95574746bd78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= Date: Fri, 17 Oct 2025 14:30:42 +0200 Subject: [PATCH 3/7] feat: allow keeping read files for a configurable time in local storage --- src/feather/app.py | 18 ++++++++++++------ src/feather/client.py | 1 + src/feather/config.default.toml | 11 +++++++++-- src/feather/config.py | 3 +++ src/feather/data.py | 7 +++++-- 5 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/feather/app.py b/src/feather/app.py index 27f90d7..3ef5697 100755 --- a/src/feather/app.py +++ b/src/feather/app.py @@ -6,6 +6,7 @@ from asyncio import Event from typing import Iterable from watchfiles import awatch from pathlib import Path +from datetime import datetime from feather.config import Config from feather.client import GReaderSession, TTRSession, ClientSession, Article, ArticleId @@ -109,13 +110,10 @@ class FeatherApp: to_mark_as_unread_id[i : i + config.articles_per_query], False ) - # regenerate/delete local file with new read/unread state + # regenerate local file with new read/unread state for article in to_mark_as_read: article.unread = False - if config.only_sync_unread_articles: - article.delete() - else: - article.regenerate() + article.regenerate() for article in to_mark_as_unread: article.unread = True article.regenerate() @@ -164,8 +162,16 @@ class FeatherApp: # Remove articles that we didn't get from the server but are in the JSON directory removed_articles = 0 + article_cutoff_timestamp = ( + datetime.now().timestamp() - config.keep_read_articles_for + ) for article in self.iter_articles(): - if article.id not in grabbed_article_paths: + if ( + # we sync all articles: remove all articles that aren't on the server + not config.only_sync_unread_articles + # we only sync unread: only remove articles that are too old + or article.last_write < article_cutoff_timestamp + ) and article.id not in grabbed_article_paths: article.delete() removed_articles += 1 diff --git a/src/feather/client.py b/src/feather/client.py index 7940821..e5ca7b7 100644 --- a/src/feather/client.py +++ b/src/feather/client.py @@ -151,6 +151,7 @@ class GReaderArticle(Article): ## Tiny Tiny RSS API ## + # Monkey patch Headline.__init__ to skip timestamp to datetime conversion # Articles may have a negative timestamp and Python's datetime.fromtimestamp doesn't like that, so instead we keep the timestamp and deal with the issue in data.py/format_datetime def Headline_init(self, attr, client): diff --git a/src/feather/config.default.toml b/src/feather/config.default.toml index ce3629b..1b12c4a 100644 --- a/src/feather/config.default.toml +++ b/src/feather/config.default.toml @@ -24,8 +24,16 @@ password = "password" # Can be set through the environment variable SERVER_ARTICLES_PER_REQUEST. articles_per_request = 0 # Set to true to only sync unread articles; Feather will not retrieve any read article from the server. +# If set to false, Feather will download ALL articles from the server, read and unread, on each synchronization. This might be a lot of data depending on how many read articles your server keeps. If you only want to keep recent read articles, look at the keep_read_articles_for settings below. # Can be set through the environment variable SERVER_ONLY_SYNC_UNREAD_ARTICLES. only_sync_unread_articles = true +# How long in seconds to keep read articles in the local storage before deleting them. +# Once an article is removed, Feather can no longer: +# - mark it as unread when its article file is restored from the trash; +# - generate articles files for read articles if html.write_read_articles = true. +# If only_sync_unread_articles = false, this does nothing (since Feather always retrieve all read articles from the server). +# Can be set through the environment variable SERVER_KEEP_READ_ARTICLES_FOR. +keep_read_articles_for = 259200 [directories] # Data directory: path where the internal Feather data will be stored. @@ -37,8 +45,7 @@ reader = "reader" [html] # If set to true, Feather will also generate articles files for read articles. -# Also remember to set server.only_sync_unread_articles = false; otherwise this will do nothing. -# The the mark-as-unread behavior will also change depending on this value: +# The the mark-as-unread behavior will change depending on this value: # - if false, marking an article as unread requires its file to be recreated/restored from the trash; # - if true, marking an article as unread requires deleting its article file (same as mark-as-read). # Can be set through the environment variable HTML_WRITE_READ_ARTICLES. diff --git a/src/feather/config.py b/src/feather/config.py index 7f72f09..98d10f2 100644 --- a/src/feather/config.py +++ b/src/feather/config.py @@ -61,6 +61,9 @@ class Config: self.only_sync_unread_articles: bool = bool( get_config("server", "only_sync_unread_articles") ) + self.keep_read_articles_for: float = float( + get_config("server", "keep_read_articles_for") + ) self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone"))) self.time_format: str = str(get_config("datetime", "format")) diff --git a/src/feather/data.py b/src/feather/data.py index 958f45e..a6af71f 100644 --- a/src/feather/data.py +++ b/src/feather/data.py @@ -16,7 +16,7 @@ from feather.config import Config def sanitize_filename( config: Config, filename: str, insert_before_suffix: str = "" ) -> str: - """Escape invalid caracters and truncate the filename as per the configuration. + """Escape invalid characters and truncate the filename as per the configuration. This operates on a single filename, not a path. (insert_before_suffix will be inserted between the stem and suffix, and is assumed to not need escaping).""" filename = filename.translate(config.filename_translation) @@ -39,7 +39,7 @@ def sanitize_filename( def format_datetime(config: Config, timestamp: int) -> str: - """Format a timestamp according to the configuraiton.""" + """Format a timestamp according to the configuration.""" if timestamp < 0: date = datetime(1970, 1, 1, tzinfo=config.timezone) + timedelta( seconds=timestamp @@ -116,6 +116,7 @@ class Article(ABC): comments_url: str = "" # article comments URL language: str = "" # article language image_url: str = "" # article main image + last_write: int = 0 # last time this article file was written (timestamp) def _hash_id(self): return sha256(str(self.id).encode("utf-8")).hexdigest() @@ -197,6 +198,7 @@ class Article(ABC): "language", "image_url", "html_path", + "last_write", ) article_json = {field: getattr(self, field) for field in stored_fields} article_json["category"] = self.category.asdict() @@ -254,6 +256,7 @@ class Article(ABC): def write(self, recompute_paths=False): """Write all the files associated with this article to disk.""" + self.last_write = datetime.now().timestamp() if self.unread or self.config.write_read_articles: try: self._write_html(recompute_path=recompute_paths) From aad1197d5c9351a1fe9891385d832fb3f58b0f32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= Date: Fri, 17 Oct 2025 14:38:51 +0200 Subject: [PATCH 4/7] fix: timestamps are floats --- src/feather/data.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/feather/data.py b/src/feather/data.py index a6af71f..4117545 100644 --- a/src/feather/data.py +++ b/src/feather/data.py @@ -38,7 +38,7 @@ def sanitize_filename( return filename[:cutoff] + "…" + insert_before_suffix + suffix -def format_datetime(config: Config, timestamp: int) -> str: +def format_datetime(config: Config, timestamp: float) -> str: """Format a timestamp according to the configuration.""" if timestamp < 0: date = datetime(1970, 1, 1, tzinfo=config.timezone) + timedelta( @@ -103,8 +103,8 @@ class Article(ABC): # with default value unread: bool = True # if the article is unread title: str = "" # article title - published: int = 0 # article publication time (timestamp) - updated: int = 0 # article update time (timestamp) + published: float = 0.0 # article publication time (timestamp) + updated: float = 0.0 # article update time (timestamp) author: str = "" # article author summary: str = "" # article summary (HTML) content: str = "" # article content (HTML) @@ -116,7 +116,7 @@ class Article(ABC): comments_url: str = "" # article comments URL language: str = "" # article language image_url: str = "" # article main image - last_write: int = 0 # last time this article file was written (timestamp) + last_write: float = 0.0 # last time this article file was written (timestamp) def _hash_id(self): return sha256(str(self.id).encode("utf-8")).hexdigest() @@ -231,7 +231,7 @@ class Article(ABC): html_path, config.article_template.render(self._get_template_dict()) ) # set accessed date to update time, modified to publication time - os.utime(html_path, (max(self.published, self.updated), self.published)) + os.utime(html_path, (max(int(self.published), int(self.updated)), int(self.published))) def _delete_html(self, missing_ok=False): """Delete the HTML file associated with this article.""" From 89ab525c9d64beded2945f21402e184bcdc2f409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= Date: Fri, 17 Oct 2025 14:55:56 +0200 Subject: [PATCH 5/7] docs: update README --- README.md | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index dc0388c..ead8668 100644 --- a/README.md +++ b/README.md @@ -43,27 +43,41 @@ Tip: if you have nested categories, search "html" to list all the articles in th ### Marking articles as read -Deleting an article will toggle their read status (will take effect on the next synchronization to the server). +Deleting an article file will mark them as read (will take effect on the next synchronization to the server). ![Marking an article as read by deleting the article file](images/markasread.gif) #### Handling read articles -The now read articles can (surprisingly) be found in the trash. If you're fast and restore them before the next synchronization, it's be as if nothing happened. However, restoring the file after synchronization will otherwise not work and the article won't be marked as unread. +The now read articles can (surprisingly) be found in the trash. After marking an article as read, there is a grace period (by default 3 days) during which you can mark read articles as unread again by restoring their files from the trash. -Instead, if you want Feather to also track read articles, you could add to your configuration file: +#### Reading read articles + +If you want to re-read your favorites articles directly in the Feather reader director, you can configure Feather to write articles files for read articles too: ```toml -# Grab both read and unread articles into the local directory -server.only_sync_unread_articles = false +# Write article HTML files for read articles +[html] +write_read_articles = true # Add a checkmark in the article filename indicating the read status -html.filename_template = "{% if unread %}☐{% else %}☑{% endif %} [{{ feed_title }}]\t{{ title }} ({{ published }}).html" +[html] +filename_template = "{% if unread %}☐{% else %}☑{% endif %} [{{ feed_title }}]\t{{ title }} ({{ published }}).html" ``` -Now both read and unread articles will be stored in the Feather reader directory, and if you delete a read article file, the article will be marked as unread (and the deleted file will be recreated during the next synchronization, but marked as unread). +Now both read and unread articles will be stored in the Feather reader directory, and after marking an article file as read by deleting it, Feather will regenerate the file on the next synchronization (but marked as read this time). + +Note that this also change the mark-as-unread behavior: since it is no longer possible to restore from the trash because the file is automatically recreated, marking an item as unread is done in the same way as mark-as-read, i.e. by deleting the file of a read article. ![Marking an article as unread by deleting the article file](images/markunread.gif) +By default, Feather will only grab unread articles from the server, so the read articles you have access to locally are only the articles kept for the 3 days grace period after marking them as read (see the [handling read articles chapter](#handling-read-articles)). If you want to have access to _all_ articles from the server, you can add to your configuration: + +```toml +# Grab both read and unread articles from the server +[server] +only_sync_unread_articles = false +``` + ### Synchronizing with the server Run `feather sync` to synchronize all local data with the server. The synchronization is done in two parts, which you can perform separately using: From 9ae72daa115a8c866f2d9e289c73ef18dc9640ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= Date: Fri, 17 Oct 2025 14:56:24 +0200 Subject: [PATCH 6/7] feat: bump version to v1.1.0 --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 703083d..e79f8f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "feather" -version = "1.0.0" +version = "1.1.0" authors = [ { name = 'Étienne "Reuh" Fildadut' } ] description = "file-based RSS reader client" readme = "README.md" diff --git a/uv.lock b/uv.lock index 3b4f1f3..a68fce4 100644 --- a/uv.lock +++ b/uv.lock @@ -69,7 +69,7 @@ wheels = [ [[package]] name = "feather" -version = "1.0.0" +version = "1.1.0" source = { editable = "." } dependencies = [ { name = "google-reader" }, From d77a92cb829925110f7fada1da4ddc9e40f581fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= Date: Fri, 17 Oct 2025 15:03:24 +0200 Subject: [PATCH 7/7] docs: copy-paste mishaps in README.md --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ead8668..88dba43 100644 --- a/README.md +++ b/README.md @@ -53,14 +53,13 @@ The now read articles can (surprisingly) be found in the trash. After marking an #### Reading read articles -If you want to re-read your favorites articles directly in the Feather reader director, you can configure Feather to write articles files for read articles too: +If you want to re-read your favorites articles directly in the Feather reader directory, you can configure Feather to write articles files for read articles too: ```toml -# Write article HTML files for read articles [html] +# Write article HTML files for read articles write_read_articles = true # Add a checkmark in the article filename indicating the read status -[html] filename_template = "{% if unread %}☐{% else %}☑{% endif %} [{{ feed_title }}]\t{{ title }} ({{ published }}).html" ``` @@ -73,8 +72,8 @@ Note that this also change the mark-as-unread behavior: since it is no longer po By default, Feather will only grab unread articles from the server, so the read articles you have access to locally are only the articles kept for the 3 days grace period after marking them as read (see the [handling read articles chapter](#handling-read-articles)). If you want to have access to _all_ articles from the server, you can add to your configuration: ```toml -# Grab both read and unread articles from the server [server] +# Grab both read and unread articles from the server only_sync_unread_articles = false ```