feat: add support for tt-rss api

2025-10-27 18:19:32 +00:00 · 2025-10-10 16:47:20 +02:00 · 2025-10-10 16:47:20 +02:00 · b100d8f0b8
commit b100d8f0b8
parent 0562a245d6
5 changed files with 256 additions and 70 deletions
--- a/README.md
+++ b/README.md
@ -70,4 +70,5 @@ You need Python 3.12 or newer. Then pip it up.
 - [ ] Actually think about the issues created by the duplicate warning
 - [x] Set generated files creation/modification date instead of putting date in filename
 - [ ] Make a proper Python package
 - [ ] Attachments
--- a/config.default.toml
+++ b/config.default.toml
@ -1,4 +1,5 @@
 [server]
 api = "googlereader"
 # (Required) URL of your server's Google Reader API endpoint
 # Can be set through the environment variable SERVER_URL.
 url = "https://rss.example.com/"
@ -37,19 +38,35 @@ template = '''
    <article style="max-width:60rem; margin:auto; text-align:justify;">
        <p style="display:flex; flex-direction:row; justify-content:space-between;">
            <span>{{ published_formatted }}</span>
-            <span><a href="{{ origin_url }}">{{ origin_title }}</a></span>
+            <span>
                {% if feed_url %}
                    <a href="{{ feed_url }}">
                {% endif %}
                        {% if feed_icon_url %}
                            <img style="height:1em;" src="{{ feed_icon_url }}">
                        {% endif %}
                        {{ feed_title }}
                {% if feed_url %}
                    </a>
                {% endif %}
            </span>
        </p>
-        <h1><a href="{{ canonical_url }}">{{ title }}</a></h1>
+        <h1><a href="{{ article_url }}">{{ title }}</a></h1>
        <h3>{{ author }}</h3>
-        <div>{{ summary | safe }}</div>
+        {% if content %}
-        <div>{{ content | safe }}</div>
+            <div>{{ content | safe }}</div>
        {% else %}
            <img src="{{ image_url }}">
            <div>{{ summary | safe }}</div>
        {% endif %}
    </article>
 </body>
 </html>
 '''
 # Filename template for generated HTML files.
 # Can be set through the environment variable HTML_FILENAME_TEMPLATE.
-filename_template = "[{{ origin_title }}]\t{{ title }} ({{ published_formatted }}).html"
+filename_template = "[{{ feed_title }}]\t{{ title }} ({{ published_formatted }}).html"
 category_template = "{{ title }}"
 # Maximum allowed filename length (in bytes assuming UTF-8 encoding) before truncating. Depending on your filesystem filename's limits it may be possible to increase the value, ask Wikipedia for details.
 # Can be set through the environment variable HTML_MAX_FILENAME_LENGTH.
 max_filename_length = 250
--- a/feather.py
+++ b/feather.py
@ -1,4 +1,5 @@
 #!/usr/bin/python3
 from __future__ import annotations
 import os
 import re
@ -8,11 +9,13 @@ import sys
 import argparse
 import asyncio
 import signal
 from abc import ABC, abstractmethod
 from datetime import datetime
 from zoneinfo import ZoneInfo
 from pathlib import Path
 from hashlib import sha256
 from jinja2 import Template
 from ttrss.client import TTRClient
 import google_reader
@ -49,16 +52,22 @@ class Config:
        # Get config fields
        self.html_root: Path = Path(get_config("directories", "reader"))
        self.json_root: Path = Path(get_config("directories", "data"))
        self.server_api: str = str(get_config("server", "api"))
        self.server_url: str = str(get_config("server", "url", False))
        self.server_user: str = str(get_config("server", "user", False))
        self.server_password: str = str(get_config("server", "password", False))
        self.items_per_query: int = int(get_config("server", "items_per_request"))
        self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone")))
        self.time_format: str = str(get_config("datetime", "format"))
        self.item_template: Template = Template(str(get_config("html", "template")), autoescape=True)
        self.item_filename_template: Template = Template(str(get_config("html", "filename_template")), autoescape=False)
        self.item_category_template: Template = Template(str(get_config("html", "category_template")), autoescape=False)
        self.max_filename_length: int = int(get_config("html", "max_filename_length"))
        self.filename_translation = str.maketrans(get_config("html", "filename_replacement"))
        self.daemon_sync_up_every: int = int(get_config("daemon", "sync_up_every"))
        self.daemon_sync_down_every: int = int(get_config("daemon", "sync_down_every"))
@ -71,34 +80,192 @@ class Config:
 #%% Interaction with server
 type Id = int | str
 class Article(ABC):
    id: Id
    title: str = ""
    published: int = 0
    updated: int = 0
    author: str = ""
    summary: str = ""
    content: str = ""
    feed_title: str = ""
    feed_url: str = ""
    feed_icon_url: str = ""
    feed_order: int = 0
    article_url: str = ""
    comments_url: str = ""
    language: str = ""
    image_url: str = ""
    def asdict(self):
        return {
            "id": self.id,
            "title": self.title,
            "published": self.published,
            "updated": self.updated,
            "author": self.author,
            "summary": self.summary,
            "content": self.content,
            "feed_title": self.feed_title,
            "feed_url": self.feed_url,
            "feed_icon_url": self.feed_icon_url,
            "feed_order": self.feed_order,
            "article_url": self.article_url,
            "comments_url": self.comments_url,
            "language": self.language,
            "image_url": self.image_url,
        }
 class GReaderArticle(Article):
    def __init__(self, session: GReaderSession, item_content):
        self.id = item_content.id
        self.title = item_content.title
        self.published = item_content.published
        self.updated = item_content.updated
        self.author = item_content.author
        self.summary = item_content.summary.content
        self.content = item_content.content.content
        self.feed_title = item_content.origin.title
        self.feed_url = item_content.origin.html_url
        self.article_url = item_content.canonical[0].href
 class TTRArticle(Article):
    def __init__(self, session: TRRSession, article):
        self.id = article.id
        self.title = article.title
        self.published = article.updated.timestamp()
        self.updated = article.updated.timestamp()
        self.author = article.author
        self.summary = article.excerpt
        self.content = article.content
        self.feed_title = article.feed_title
        self.feed_url = article.site_url
        self.feed_icon_url = session.feeds[article.feed_id]["icon"]
        self.feed_order = session.feeds[article.feed_id]["order"]
        self.article_url = article.link
        self.comments_url = article.comments_link
        self.language = article.lang
        self.image_url = article.flavor_image
 class Category:
    id: Id
    title: str
    parents: list[Category]
    order: int = 0
    def __init__(self, id, title, parents=[], order=0):
        self.id = id
        self.title = title
        self.parents = parents
        self.order = order
    def asdict(self):
        return {
            "id": self.id,
            "title": self.title,
            "parents": [ dir.asdict() for dir in self.parents ],
            "order": self.order
        }
 class ClientSession(ABC):
    @abstractmethod
    def mark_as_read(self, item_ids: list[Id]):
        """
        Mark all the given articles as read.
        """
        pass
    @abstractmethod
    def list_folders(self) -> list[Category]:
        """
        Returns a list of all the categories on the server.
        """
        pass
    @abstractmethod
    def get_unread_articles_in_folder(self, folder_id: Id, limit: int, continuation: int=0) -> list[Article]:
        """
        Returns a list of Articles in the given category. limit and continuation are required for pagination.
        """
        pass
 label_name = re.compile("user/.*/label/(.*)")
-class ClientSession:
+class GReaderSession(ClientSession):
-    client: google_reader.Client
+    greader: google_reader.Client
    auth_token: str
    csrf_token: str
    def __init__(self, config: Config):
-        self.client = google_reader.Client(config.server_url)
+        self.greader = google_reader.Client(config.server_url)
-        self.auth_token = self.client.login(config.server_user, config.server_password)
+        self.auth_token = self.greader.login(config.server_user, config.server_password)
-        self.csrf_token = self.client.get_token(self.auth_token)
+        self.csrf_token = self.greader.get_token(self.auth_token)
-    def mark_as_read(self, item_ids):
+    def mark_as_read(self, item_ids: list[Id]):
-        self.client.edit_tags(self.auth_token, self.csrf_token, item_ids=item_ids, add_tags=[google_reader.STREAM_READ])
+        self.greader.edit_tags(self.auth_token, self.csrf_token, item_ids=item_ids, add_tags=[google_reader.STREAM_READ])
    def list_folders(self):
-        folders = [tag for tag in self.client.list_tags(self.auth_token) if tag.type == "folder"]
+        folders = [tag for tag in self.greader.list_tags(self.auth_token) if tag.type == "folder"]
        l = []
        for folder in folders:
            folder_name = folder.label or label_name.search(folder.id).group(1)
            folder_id = folder.id
-            l.append((folder_name, folder_id))
+            l.append(Category(id=folder_id, title=folder_name))
        return l
-    def get_stream_items_ids(self, *args, **kwargs):
+    def get_unread_articles_in_folder(self, folder_id, limit=500, continuation=0):
-        return self.client.get_stream_items_ids(self.auth_token, *args, **kwargs)
+        items_ids = self.greader.get_stream_items_ids(self.auth_token, stream_id=folder_id, exclude_target="user/-/state/com.google/read", limit=limit, continuation=continuation)
        item_contents = self.greader.get_stream_items_contents(self.auth_token, self.csrf_token, item_ids=[item.id for item in items.item_refs])
        return [ GReaderArticle(self, item_content) for item_content in item_contents.items ]
 class TRRSession(ClientSession):
    ttrss: TTRClient
    feeds: dict
    def __init__(self, config: Config):
        self.ttrss = TTRClient(config.server_url, config.server_user, config.server_password, auto_login=True)
        self.ttrss.login()
        self.feeds = {}
-    def get_stream_items_contents(self, *args, **kwargs):
+    def mark_as_read(self, item_ids):
-        return self.client.get_stream_items_contents(self.auth_token, self.csrf_token, *args, **kwargs)
+        self.ttrss.mark_read(item_ids)
    def list_folders(self):
        self.feeds = {}
        def get_categories_recursive(parent_category, parent_categories=[]):
            categories = []
            for i in range(len(parent_category["items"])):
                item = parent_category["items"][i]
                # skip special categories and feeds
                if item["bare_id"] <= 0:
                    continue
                # category
                elif item.get("type") == "category":
                    category = Category(id=item["bare_id"], parents=parent_categories, title=item["name"], order=i)
                    categories.append(category)                    
                    categories += get_categories_recursive(item, parent_categories+[category])
                # feeds
                elif "type" not in item:
                    self.feeds[item["bare_id"]] = item
                    self.feeds[item["bare_id"]]["order"] = i
            return categories
        tree = self.ttrss.get_feed_tree()
        return get_categories_recursive(tree["categories"])
    def get_unread_articles_in_folder(self, folder_id, limit=100, continuation=0):
        headlines = self.ttrss.get_headlines(feed_id=folder_id, limit=limit, skip=continuation, is_cat=True, show_excerpt=True, show_content=True, view_mode="unread", include_attachments=True, include_nested=False)
        return [ TTRArticle(self, headline) for headline in headlines ]
 def make_client_session(config: Config):
    api = config.server_api
    if api == "googlereader":
        return GReaderSession(config)
    elif api == "ttrss":
        return TRRSession(config)
    else:
        print(f"Configuration error: server.api must be either ttrss or googlereader", file=sys.stderr)
        exit(1)
 #%% Regular feather operations
@ -139,8 +306,11 @@ def truncate_filename(config, filename):
        return filename[:cutoff] + '…' + suffix
 def get_html_path(config, item_json):
-    folder_directory = config.html_root / escape_filename(config, item_json["folder"])
+    folder_directory = config.html_root
-    folder_directory.mkdir(exist_ok=True)
+    for folder in item_json["folder"]["parents"]:
        folder_directory /= escape_filename(config, config.item_category_template.render(folder))
    folder_directory /= escape_filename(config, config.item_category_template.render(item_json["folder"]))
    folder_directory.mkdir(parents=True, exist_ok=True) # TODO move
    html_name = truncate_filename(config, escape_filename(config, config.item_filename_template.render(item_json)))
@ -163,56 +333,42 @@ def synchronize_with_server(config, client_session):
    grabbed_item_paths = []
    folders = client_session.list_folders()
-    for (folder_name, folder_id) in folders:
+    for category in folders:
-        print(f"  Updating folder {folder_name}")
+        folder_path, folder_id = category.title, category.id
        print(f"  Updating folder {folder_path}") # TODO fixme
-        def process(item_ids):
+        remaining, continuation = True, 0
-            nonlocal new_items, updated_items, grabbed_item_paths
+        while remaining:
-            if len(item_ids) > 0:
+            articles = client_session.get_unread_articles_in_folder(folder_id, limit=config.items_per_query, continuation=continuation)
-                item_contents = client_session.get_stream_items_contents(item_ids=item_ids)
+            if len(articles) >= config.items_per_query:
-                for item_content in item_contents.items:
+                continuation += len(articles)
-                    item_json = {
+            else:
-                        "id": item_content.id,
+                remaining = False
-                        "folder": folder_name,
+            
-                        "title": item_content.title,
+            for item in articles:
-                        "published": item_content.published,
+                item_json = item.asdict()
-                        "updated": item_content.updated,
+                item_json["folder"] = category.asdict()
-                        "author": item_content.author,
+                set_computed_fields_json(config, item_json)
                        "summary": item_content.summary.content,
                        "content": item_content.content.content,
                        "origin_title": item_content.origin.title,
                        "origin_url": item_content.origin.html_url,
                        "canonical_url": item_content.canonical[0].href,
                    }
                    set_computed_fields_json(config, item_json)
-                    json_path = config.json_root / f"{ sha256(item_json["id"].encode("utf-8")).hexdigest() }.json"
+                json_path = config.json_root / f"{ sha256(str(item_json["id"]).encode("utf-8")).hexdigest() }.json"
-                    grabbed_item_paths.append(json_path)
+                grabbed_item_paths.append(json_path)
-                    write_files, updating = False, False
+                write_files, updating = False, False
-                    if not json_path.exists():
+                if not json_path.exists():
-                        write_files = True
+                    write_files = True
-                        new_items += 1
+                    new_items += 1
-                    else:
+                else:
-                        old_item_json = json.load(json_path.open("r"))
+                    old_item_json = json.load(json_path.open("r"))
-                        if item_json["updated"] > old_item_json["updated"]:
+                    if item_json["updated"] > old_item_json["updated"]:
-                            write_files, updating = True, True
+                        write_files, updating = True, True
-                            updated_items += 1
+                        updated_items += 1
-                    if write_files:
+                if write_files:
-                        # write JSON
+                    # write JSON
-                        with json_path.open("w") as f:
+                    with json_path.open("w") as f:
-                            json.dump(item_json, f)
+                        json.dump(item_json, f)
-                        # write HTML
+                    # write HTML
-                        generate_html_for_item(config, item_json, regenerate=updating)
+                    generate_html_for_item(config, item_json, regenerate=updating)
        continuation = None
        while continuation != '':
            items = client_session.get_stream_items_ids(stream_id=folder_id, exclude_target="user/-/state/com.google/read", limit=config.items_per_query, continuation=continuation)
            item_ids = [item.id for item in items.item_refs]
            process(item_ids)
            continuation = items.continuation
    # Remove items that we didn't get from the server but are in the JSON directory
    removed_items = 0
@ -328,16 +484,16 @@ def main():
    config = Config()
    if args.action == "sync":
-        client_session = ClientSession(config)
+        client_session = make_client_session(config)
        synchronize(config, client_session)
    elif args.action == "sync-up":
-        client_session = ClientSession(config)
+        client_session = make_client_session(config)
        synchronize_local_changes(config, client_session)
    elif args.action == "sync-down":
-        client_session = ClientSession(config)
+        client_session = make_client_session(config)
        synchronize_remote_changes(config, client_session)
    elif args.action == "daemon":
-        client_session = ClientSession(config)
+        client_session = make_client_session(config)
        try:
            asyncio.run(daemon(config, client_session))
        except KeyboardInterrupt:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -8,6 +8,7 @@ requires-python = ">=3.12"
 dependencies = [
    "jinja2>=3.1.6",
    "requests>=2.32.5",
    "ttrss-python>=0.5",
 ]
 license = "ISC"
 license-files = [ "LICENSE" ]
--- a/uv.lock
+++ b/uv.lock
@ -60,12 +60,14 @@ source = { virtual = "." }
 dependencies = [
    { name = "jinja2" },
    { name = "requests" },
    { name = "ttrss-python" },
 ]
 [package.metadata]
 requires-dist = [
    { name = "jinja2", specifier = ">=3.1.6" },
    { name = "requests", specifier = ">=2.32.5" },
    { name = "ttrss-python", specifier = ">=0.5" },
 ]
 [[package]]
@ -167,6 +169,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
 ]
 [[package]]
 name = "ttrss-python"
 version = "0.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "requests" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a2/72/786e2edf469d6d1e048f3dd043a50ececf7674d10402d703d1297bb6e102/ttrss-python-0.5.tar.gz", hash = "sha256:ad7816b85e3c0b13822f321f91ed7f19dc3b82237f2d7838c2dcb9aac0f4ca07", size = 6247, upload-time = "2015-09-02T08:53:06.221Z" }
 [[package]]
 name = "urllib3"
 version = "2.5.0"