From b100d8f0b861c29d25fe55b8d2735b235a89a722 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= Date: Fri, 10 Oct 2025 16:47:20 +0200 Subject: [PATCH] feat: add support for tt-rss api --- README.md | 1 + config.default.toml | 27 ++++- feather.py | 286 ++++++++++++++++++++++++++++++++++---------- pyproject.toml | 1 + uv.lock | 11 ++ 5 files changed, 256 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index 036cae4..c33bbe7 100644 --- a/README.md +++ b/README.md @@ -70,4 +70,5 @@ You need Python 3.12 or newer. Then pip it up. - [ ] Actually think about the issues created by the duplicate warning - [x] Set generated files creation/modification date instead of putting date in filename - [ ] Make a proper Python package +- [ ] Attachments diff --git a/config.default.toml b/config.default.toml index 68667f8..12fd3ab 100644 --- a/config.default.toml +++ b/config.default.toml @@ -1,4 +1,5 @@ [server] +api = "googlereader" # (Required) URL of your server's Google Reader API endpoint # Can be set through the environment variable SERVER_URL. url = "https://rss.example.com/" @@ -37,19 +38,35 @@ template = '''

{{ published_formatted }} - {{ origin_title }} + + {% if feed_url %} + + {% endif %} + {% if feed_icon_url %} + + {% endif %} + {{ feed_title }} + {% if feed_url %} + + {% endif %} +

-

{{ title }}

+

{{ title }}

{{ author }}

-
{{ summary | safe }}
-
{{ content | safe }}
+ {% if content %} +
{{ content | safe }}
+ {% else %} + +
{{ summary | safe }}
+ {% endif %}
''' # Filename template for generated HTML files. # Can be set through the environment variable HTML_FILENAME_TEMPLATE. -filename_template = "[{{ origin_title }}]\t{{ title }} ({{ published_formatted }}).html" +filename_template = "[{{ feed_title }}]\t{{ title }} ({{ published_formatted }}).html" +category_template = "{{ title }}" # Maximum allowed filename length (in bytes assuming UTF-8 encoding) before truncating. Depending on your filesystem filename's limits it may be possible to increase the value, ask Wikipedia for details. # Can be set through the environment variable HTML_MAX_FILENAME_LENGTH. max_filename_length = 250 diff --git a/feather.py b/feather.py index 80ee18e..d026cf7 100755 --- a/feather.py +++ b/feather.py @@ -1,4 +1,5 @@ #!/usr/bin/python3 +from __future__ import annotations import os import re @@ -8,11 +9,13 @@ import sys import argparse import asyncio import signal +from abc import ABC, abstractmethod from datetime import datetime from zoneinfo import ZoneInfo from pathlib import Path from hashlib import sha256 from jinja2 import Template +from ttrss.client import TTRClient import google_reader @@ -49,16 +52,22 @@ class Config: # Get config fields self.html_root: Path = Path(get_config("directories", "reader")) self.json_root: Path = Path(get_config("directories", "data")) + + self.server_api: str = str(get_config("server", "api")) self.server_url: str = str(get_config("server", "url", False)) self.server_user: str = str(get_config("server", "user", False)) self.server_password: str = str(get_config("server", "password", False)) self.items_per_query: int = int(get_config("server", "items_per_request")) + self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone"))) self.time_format: str = str(get_config("datetime", "format")) + self.item_template: Template = Template(str(get_config("html", "template")), autoescape=True) self.item_filename_template: Template = Template(str(get_config("html", "filename_template")), autoescape=False) + self.item_category_template: Template = Template(str(get_config("html", "category_template")), autoescape=False) self.max_filename_length: int = int(get_config("html", "max_filename_length")) self.filename_translation = str.maketrans(get_config("html", "filename_replacement")) + self.daemon_sync_up_every: int = int(get_config("daemon", "sync_up_every")) self.daemon_sync_down_every: int = int(get_config("daemon", "sync_down_every")) @@ -71,34 +80,192 @@ class Config: #%% Interaction with server +type Id = int | str + +class Article(ABC): + id: Id + title: str = "" + published: int = 0 + updated: int = 0 + author: str = "" + summary: str = "" + content: str = "" + feed_title: str = "" + feed_url: str = "" + feed_icon_url: str = "" + feed_order: int = 0 + article_url: str = "" + comments_url: str = "" + language: str = "" + image_url: str = "" + + def asdict(self): + return { + "id": self.id, + "title": self.title, + "published": self.published, + "updated": self.updated, + "author": self.author, + "summary": self.summary, + "content": self.content, + "feed_title": self.feed_title, + "feed_url": self.feed_url, + "feed_icon_url": self.feed_icon_url, + "feed_order": self.feed_order, + "article_url": self.article_url, + "comments_url": self.comments_url, + "language": self.language, + "image_url": self.image_url, + } + +class GReaderArticle(Article): + def __init__(self, session: GReaderSession, item_content): + self.id = item_content.id + self.title = item_content.title + self.published = item_content.published + self.updated = item_content.updated + self.author = item_content.author + self.summary = item_content.summary.content + self.content = item_content.content.content + self.feed_title = item_content.origin.title + self.feed_url = item_content.origin.html_url + self.article_url = item_content.canonical[0].href + +class TTRArticle(Article): + def __init__(self, session: TRRSession, article): + self.id = article.id + self.title = article.title + self.published = article.updated.timestamp() + self.updated = article.updated.timestamp() + self.author = article.author + self.summary = article.excerpt + self.content = article.content + self.feed_title = article.feed_title + self.feed_url = article.site_url + self.feed_icon_url = session.feeds[article.feed_id]["icon"] + self.feed_order = session.feeds[article.feed_id]["order"] + self.article_url = article.link + self.comments_url = article.comments_link + self.language = article.lang + self.image_url = article.flavor_image + +class Category: + id: Id + title: str + parents: list[Category] + order: int = 0 + + def __init__(self, id, title, parents=[], order=0): + self.id = id + self.title = title + self.parents = parents + self.order = order + + def asdict(self): + return { + "id": self.id, + "title": self.title, + "parents": [ dir.asdict() for dir in self.parents ], + "order": self.order + } + +class ClientSession(ABC): + @abstractmethod + def mark_as_read(self, item_ids: list[Id]): + """ + Mark all the given articles as read. + """ + pass + + @abstractmethod + def list_folders(self) -> list[Category]: + """ + Returns a list of all the categories on the server. + """ + pass + + @abstractmethod + def get_unread_articles_in_folder(self, folder_id: Id, limit: int, continuation: int=0) -> list[Article]: + """ + Returns a list of Articles in the given category. limit and continuation are required for pagination. + """ + pass + label_name = re.compile("user/.*/label/(.*)") -class ClientSession: - client: google_reader.Client +class GReaderSession(ClientSession): + greader: google_reader.Client auth_token: str csrf_token: str def __init__(self, config: Config): - self.client = google_reader.Client(config.server_url) - self.auth_token = self.client.login(config.server_user, config.server_password) - self.csrf_token = self.client.get_token(self.auth_token) + self.greader = google_reader.Client(config.server_url) + self.auth_token = self.greader.login(config.server_user, config.server_password) + self.csrf_token = self.greader.get_token(self.auth_token) - def mark_as_read(self, item_ids): - self.client.edit_tags(self.auth_token, self.csrf_token, item_ids=item_ids, add_tags=[google_reader.STREAM_READ]) + def mark_as_read(self, item_ids: list[Id]): + self.greader.edit_tags(self.auth_token, self.csrf_token, item_ids=item_ids, add_tags=[google_reader.STREAM_READ]) def list_folders(self): - folders = [tag for tag in self.client.list_tags(self.auth_token) if tag.type == "folder"] + folders = [tag for tag in self.greader.list_tags(self.auth_token) if tag.type == "folder"] l = [] for folder in folders: folder_name = folder.label or label_name.search(folder.id).group(1) folder_id = folder.id - l.append((folder_name, folder_id)) + l.append(Category(id=folder_id, title=folder_name)) return l - def get_stream_items_ids(self, *args, **kwargs): - return self.client.get_stream_items_ids(self.auth_token, *args, **kwargs) + def get_unread_articles_in_folder(self, folder_id, limit=500, continuation=0): + items_ids = self.greader.get_stream_items_ids(self.auth_token, stream_id=folder_id, exclude_target="user/-/state/com.google/read", limit=limit, continuation=continuation) + item_contents = self.greader.get_stream_items_contents(self.auth_token, self.csrf_token, item_ids=[item.id for item in items.item_refs]) + return [ GReaderArticle(self, item_content) for item_content in item_contents.items ] + +class TRRSession(ClientSession): + ttrss: TTRClient + feeds: dict + + def __init__(self, config: Config): + self.ttrss = TTRClient(config.server_url, config.server_user, config.server_password, auto_login=True) + self.ttrss.login() + self.feeds = {} - def get_stream_items_contents(self, *args, **kwargs): - return self.client.get_stream_items_contents(self.auth_token, self.csrf_token, *args, **kwargs) + def mark_as_read(self, item_ids): + self.ttrss.mark_read(item_ids) + + def list_folders(self): + self.feeds = {} + def get_categories_recursive(parent_category, parent_categories=[]): + categories = [] + for i in range(len(parent_category["items"])): + item = parent_category["items"][i] + # skip special categories and feeds + if item["bare_id"] <= 0: + continue + # category + elif item.get("type") == "category": + category = Category(id=item["bare_id"], parents=parent_categories, title=item["name"], order=i) + categories.append(category) + categories += get_categories_recursive(item, parent_categories+[category]) + # feeds + elif "type" not in item: + self.feeds[item["bare_id"]] = item + self.feeds[item["bare_id"]]["order"] = i + return categories + tree = self.ttrss.get_feed_tree() + return get_categories_recursive(tree["categories"]) + + def get_unread_articles_in_folder(self, folder_id, limit=100, continuation=0): + headlines = self.ttrss.get_headlines(feed_id=folder_id, limit=limit, skip=continuation, is_cat=True, show_excerpt=True, show_content=True, view_mode="unread", include_attachments=True, include_nested=False) + return [ TTRArticle(self, headline) for headline in headlines ] + +def make_client_session(config: Config): + api = config.server_api + if api == "googlereader": + return GReaderSession(config) + elif api == "ttrss": + return TRRSession(config) + else: + print(f"Configuration error: server.api must be either ttrss or googlereader", file=sys.stderr) + exit(1) #%% Regular feather operations @@ -139,8 +306,11 @@ def truncate_filename(config, filename): return filename[:cutoff] + '…' + suffix def get_html_path(config, item_json): - folder_directory = config.html_root / escape_filename(config, item_json["folder"]) - folder_directory.mkdir(exist_ok=True) + folder_directory = config.html_root + for folder in item_json["folder"]["parents"]: + folder_directory /= escape_filename(config, config.item_category_template.render(folder)) + folder_directory /= escape_filename(config, config.item_category_template.render(item_json["folder"])) + folder_directory.mkdir(parents=True, exist_ok=True) # TODO move html_name = truncate_filename(config, escape_filename(config, config.item_filename_template.render(item_json))) @@ -163,56 +333,42 @@ def synchronize_with_server(config, client_session): grabbed_item_paths = [] folders = client_session.list_folders() - for (folder_name, folder_id) in folders: - print(f" Updating folder {folder_name}") + for category in folders: + folder_path, folder_id = category.title, category.id + print(f" Updating folder {folder_path}") # TODO fixme - def process(item_ids): - nonlocal new_items, updated_items, grabbed_item_paths - if len(item_ids) > 0: - item_contents = client_session.get_stream_items_contents(item_ids=item_ids) - for item_content in item_contents.items: - item_json = { - "id": item_content.id, - "folder": folder_name, - "title": item_content.title, - "published": item_content.published, - "updated": item_content.updated, - "author": item_content.author, - "summary": item_content.summary.content, - "content": item_content.content.content, - "origin_title": item_content.origin.title, - "origin_url": item_content.origin.html_url, - "canonical_url": item_content.canonical[0].href, - } - set_computed_fields_json(config, item_json) + remaining, continuation = True, 0 + while remaining: + articles = client_session.get_unread_articles_in_folder(folder_id, limit=config.items_per_query, continuation=continuation) + if len(articles) >= config.items_per_query: + continuation += len(articles) + else: + remaining = False + + for item in articles: + item_json = item.asdict() + item_json["folder"] = category.asdict() + set_computed_fields_json(config, item_json) - json_path = config.json_root / f"{ sha256(item_json["id"].encode("utf-8")).hexdigest() }.json" - grabbed_item_paths.append(json_path) + json_path = config.json_root / f"{ sha256(str(item_json["id"]).encode("utf-8")).hexdigest() }.json" + grabbed_item_paths.append(json_path) - write_files, updating = False, False - if not json_path.exists(): - write_files = True - new_items += 1 - else: - old_item_json = json.load(json_path.open("r")) - if item_json["updated"] > old_item_json["updated"]: - write_files, updating = True, True - updated_items += 1 + write_files, updating = False, False + if not json_path.exists(): + write_files = True + new_items += 1 + else: + old_item_json = json.load(json_path.open("r")) + if item_json["updated"] > old_item_json["updated"]: + write_files, updating = True, True + updated_items += 1 - if write_files: - # write JSON - with json_path.open("w") as f: - json.dump(item_json, f) - # write HTML - generate_html_for_item(config, item_json, regenerate=updating) - - - continuation = None - while continuation != '': - items = client_session.get_stream_items_ids(stream_id=folder_id, exclude_target="user/-/state/com.google/read", limit=config.items_per_query, continuation=continuation) - item_ids = [item.id for item in items.item_refs] - process(item_ids) - continuation = items.continuation + if write_files: + # write JSON + with json_path.open("w") as f: + json.dump(item_json, f) + # write HTML + generate_html_for_item(config, item_json, regenerate=updating) # Remove items that we didn't get from the server but are in the JSON directory removed_items = 0 @@ -328,16 +484,16 @@ def main(): config = Config() if args.action == "sync": - client_session = ClientSession(config) + client_session = make_client_session(config) synchronize(config, client_session) elif args.action == "sync-up": - client_session = ClientSession(config) + client_session = make_client_session(config) synchronize_local_changes(config, client_session) elif args.action == "sync-down": - client_session = ClientSession(config) + client_session = make_client_session(config) synchronize_remote_changes(config, client_session) elif args.action == "daemon": - client_session = ClientSession(config) + client_session = make_client_session(config) try: asyncio.run(daemon(config, client_session)) except KeyboardInterrupt: diff --git a/pyproject.toml b/pyproject.toml index 1cd4f4f..6275813 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ requires-python = ">=3.12" dependencies = [ "jinja2>=3.1.6", "requests>=2.32.5", + "ttrss-python>=0.5", ] license = "ISC" license-files = [ "LICENSE" ] diff --git a/uv.lock b/uv.lock index 9566f16..952e5f3 100644 --- a/uv.lock +++ b/uv.lock @@ -60,12 +60,14 @@ source = { virtual = "." } dependencies = [ { name = "jinja2" }, { name = "requests" }, + { name = "ttrss-python" }, ] [package.metadata] requires-dist = [ { name = "jinja2", specifier = ">=3.1.6" }, { name = "requests", specifier = ">=2.32.5" }, + { name = "ttrss-python", specifier = ">=0.5" }, ] [[package]] @@ -167,6 +169,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, ] +[[package]] +name = "ttrss-python" +version = "0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a2/72/786e2edf469d6d1e048f3dd043a50ececf7674d10402d703d1297bb6e102/ttrss-python-0.5.tar.gz", hash = "sha256:ad7816b85e3c0b13822f321f91ed7f19dc3b82237f2d7838c2dcb9aac0f4ca07", size = 6247, upload-time = "2015-09-02T08:53:06.221Z" } + [[package]] name = "urllib3" version = "2.5.0"