1
0
Fork 0
mirror of https://codeberg.org/Reuh/feather.git synced 2025-10-27 18:19:32 +00:00

feat: add support for tt-rss api

This commit is contained in:
Étienne Fildadut 2025-10-10 16:47:20 +02:00
parent 0562a245d6
commit b100d8f0b8
5 changed files with 256 additions and 70 deletions

View file

@ -70,4 +70,5 @@ You need Python 3.12 or newer. Then pip it up.
- [ ] Actually think about the issues created by the duplicate warning
- [x] Set generated files creation/modification date instead of putting date in filename
- [ ] Make a proper Python package
- [ ] Attachments

View file

@ -1,4 +1,5 @@
[server]
api = "googlereader"
# (Required) URL of your server's Google Reader API endpoint
# Can be set through the environment variable SERVER_URL.
url = "https://rss.example.com/"
@ -37,19 +38,35 @@ template = '''
<article style="max-width:60rem; margin:auto; text-align:justify;">
<p style="display:flex; flex-direction:row; justify-content:space-between;">
<span>{{ published_formatted }}</span>
<span><a href="{{ origin_url }}">{{ origin_title }}</a></span>
<span>
{% if feed_url %}
<a href="{{ feed_url }}">
{% endif %}
{% if feed_icon_url %}
<img style="height:1em;" src="{{ feed_icon_url }}">
{% endif %}
{{ feed_title }}
{% if feed_url %}
</a>
{% endif %}
</span>
</p>
<h1><a href="{{ canonical_url }}">{{ title }}</a></h1>
<h1><a href="{{ article_url }}">{{ title }}</a></h1>
<h3>{{ author }}</h3>
<div>{{ summary | safe }}</div>
{% if content %}
<div>{{ content | safe }}</div>
{% else %}
<img src="{{ image_url }}">
<div>{{ summary | safe }}</div>
{% endif %}
</article>
</body>
</html>
'''
# Filename template for generated HTML files.
# Can be set through the environment variable HTML_FILENAME_TEMPLATE.
filename_template = "[{{ origin_title }}]\t{{ title }} ({{ published_formatted }}).html"
filename_template = "[{{ feed_title }}]\t{{ title }} ({{ published_formatted }}).html"
category_template = "{{ title }}"
# Maximum allowed filename length (in bytes assuming UTF-8 encoding) before truncating. Depending on your filesystem filename's limits it may be possible to increase the value, ask Wikipedia for details.
# Can be set through the environment variable HTML_MAX_FILENAME_LENGTH.
max_filename_length = 250

View file

@ -1,4 +1,5 @@
#!/usr/bin/python3
from __future__ import annotations
import os
import re
@ -8,11 +9,13 @@ import sys
import argparse
import asyncio
import signal
from abc import ABC, abstractmethod
from datetime import datetime
from zoneinfo import ZoneInfo
from pathlib import Path
from hashlib import sha256
from jinja2 import Template
from ttrss.client import TTRClient
import google_reader
@ -49,16 +52,22 @@ class Config:
# Get config fields
self.html_root: Path = Path(get_config("directories", "reader"))
self.json_root: Path = Path(get_config("directories", "data"))
self.server_api: str = str(get_config("server", "api"))
self.server_url: str = str(get_config("server", "url", False))
self.server_user: str = str(get_config("server", "user", False))
self.server_password: str = str(get_config("server", "password", False))
self.items_per_query: int = int(get_config("server", "items_per_request"))
self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone")))
self.time_format: str = str(get_config("datetime", "format"))
self.item_template: Template = Template(str(get_config("html", "template")), autoescape=True)
self.item_filename_template: Template = Template(str(get_config("html", "filename_template")), autoescape=False)
self.item_category_template: Template = Template(str(get_config("html", "category_template")), autoescape=False)
self.max_filename_length: int = int(get_config("html", "max_filename_length"))
self.filename_translation = str.maketrans(get_config("html", "filename_replacement"))
self.daemon_sync_up_every: int = int(get_config("daemon", "sync_up_every"))
self.daemon_sync_down_every: int = int(get_config("daemon", "sync_down_every"))
@ -71,34 +80,192 @@ class Config:
#%% Interaction with server
type Id = int | str
class Article(ABC):
id: Id
title: str = ""
published: int = 0
updated: int = 0
author: str = ""
summary: str = ""
content: str = ""
feed_title: str = ""
feed_url: str = ""
feed_icon_url: str = ""
feed_order: int = 0
article_url: str = ""
comments_url: str = ""
language: str = ""
image_url: str = ""
def asdict(self):
return {
"id": self.id,
"title": self.title,
"published": self.published,
"updated": self.updated,
"author": self.author,
"summary": self.summary,
"content": self.content,
"feed_title": self.feed_title,
"feed_url": self.feed_url,
"feed_icon_url": self.feed_icon_url,
"feed_order": self.feed_order,
"article_url": self.article_url,
"comments_url": self.comments_url,
"language": self.language,
"image_url": self.image_url,
}
class GReaderArticle(Article):
def __init__(self, session: GReaderSession, item_content):
self.id = item_content.id
self.title = item_content.title
self.published = item_content.published
self.updated = item_content.updated
self.author = item_content.author
self.summary = item_content.summary.content
self.content = item_content.content.content
self.feed_title = item_content.origin.title
self.feed_url = item_content.origin.html_url
self.article_url = item_content.canonical[0].href
class TTRArticle(Article):
def __init__(self, session: TRRSession, article):
self.id = article.id
self.title = article.title
self.published = article.updated.timestamp()
self.updated = article.updated.timestamp()
self.author = article.author
self.summary = article.excerpt
self.content = article.content
self.feed_title = article.feed_title
self.feed_url = article.site_url
self.feed_icon_url = session.feeds[article.feed_id]["icon"]
self.feed_order = session.feeds[article.feed_id]["order"]
self.article_url = article.link
self.comments_url = article.comments_link
self.language = article.lang
self.image_url = article.flavor_image
class Category:
id: Id
title: str
parents: list[Category]
order: int = 0
def __init__(self, id, title, parents=[], order=0):
self.id = id
self.title = title
self.parents = parents
self.order = order
def asdict(self):
return {
"id": self.id,
"title": self.title,
"parents": [ dir.asdict() for dir in self.parents ],
"order": self.order
}
class ClientSession(ABC):
@abstractmethod
def mark_as_read(self, item_ids: list[Id]):
"""
Mark all the given articles as read.
"""
pass
@abstractmethod
def list_folders(self) -> list[Category]:
"""
Returns a list of all the categories on the server.
"""
pass
@abstractmethod
def get_unread_articles_in_folder(self, folder_id: Id, limit: int, continuation: int=0) -> list[Article]:
"""
Returns a list of Articles in the given category. limit and continuation are required for pagination.
"""
pass
label_name = re.compile("user/.*/label/(.*)")
class ClientSession:
client: google_reader.Client
class GReaderSession(ClientSession):
greader: google_reader.Client
auth_token: str
csrf_token: str
def __init__(self, config: Config):
self.client = google_reader.Client(config.server_url)
self.auth_token = self.client.login(config.server_user, config.server_password)
self.csrf_token = self.client.get_token(self.auth_token)
self.greader = google_reader.Client(config.server_url)
self.auth_token = self.greader.login(config.server_user, config.server_password)
self.csrf_token = self.greader.get_token(self.auth_token)
def mark_as_read(self, item_ids):
self.client.edit_tags(self.auth_token, self.csrf_token, item_ids=item_ids, add_tags=[google_reader.STREAM_READ])
def mark_as_read(self, item_ids: list[Id]):
self.greader.edit_tags(self.auth_token, self.csrf_token, item_ids=item_ids, add_tags=[google_reader.STREAM_READ])
def list_folders(self):
folders = [tag for tag in self.client.list_tags(self.auth_token) if tag.type == "folder"]
folders = [tag for tag in self.greader.list_tags(self.auth_token) if tag.type == "folder"]
l = []
for folder in folders:
folder_name = folder.label or label_name.search(folder.id).group(1)
folder_id = folder.id
l.append((folder_name, folder_id))
l.append(Category(id=folder_id, title=folder_name))
return l
def get_stream_items_ids(self, *args, **kwargs):
return self.client.get_stream_items_ids(self.auth_token, *args, **kwargs)
def get_unread_articles_in_folder(self, folder_id, limit=500, continuation=0):
items_ids = self.greader.get_stream_items_ids(self.auth_token, stream_id=folder_id, exclude_target="user/-/state/com.google/read", limit=limit, continuation=continuation)
item_contents = self.greader.get_stream_items_contents(self.auth_token, self.csrf_token, item_ids=[item.id for item in items.item_refs])
return [ GReaderArticle(self, item_content) for item_content in item_contents.items ]
def get_stream_items_contents(self, *args, **kwargs):
return self.client.get_stream_items_contents(self.auth_token, self.csrf_token, *args, **kwargs)
class TRRSession(ClientSession):
ttrss: TTRClient
feeds: dict
def __init__(self, config: Config):
self.ttrss = TTRClient(config.server_url, config.server_user, config.server_password, auto_login=True)
self.ttrss.login()
self.feeds = {}
def mark_as_read(self, item_ids):
self.ttrss.mark_read(item_ids)
def list_folders(self):
self.feeds = {}
def get_categories_recursive(parent_category, parent_categories=[]):
categories = []
for i in range(len(parent_category["items"])):
item = parent_category["items"][i]
# skip special categories and feeds
if item["bare_id"] <= 0:
continue
# category
elif item.get("type") == "category":
category = Category(id=item["bare_id"], parents=parent_categories, title=item["name"], order=i)
categories.append(category)
categories += get_categories_recursive(item, parent_categories+[category])
# feeds
elif "type" not in item:
self.feeds[item["bare_id"]] = item
self.feeds[item["bare_id"]]["order"] = i
return categories
tree = self.ttrss.get_feed_tree()
return get_categories_recursive(tree["categories"])
def get_unread_articles_in_folder(self, folder_id, limit=100, continuation=0):
headlines = self.ttrss.get_headlines(feed_id=folder_id, limit=limit, skip=continuation, is_cat=True, show_excerpt=True, show_content=True, view_mode="unread", include_attachments=True, include_nested=False)
return [ TTRArticle(self, headline) for headline in headlines ]
def make_client_session(config: Config):
api = config.server_api
if api == "googlereader":
return GReaderSession(config)
elif api == "ttrss":
return TRRSession(config)
else:
print(f"Configuration error: server.api must be either ttrss or googlereader", file=sys.stderr)
exit(1)
#%% Regular feather operations
@ -139,8 +306,11 @@ def truncate_filename(config, filename):
return filename[:cutoff] + '' + suffix
def get_html_path(config, item_json):
folder_directory = config.html_root / escape_filename(config, item_json["folder"])
folder_directory.mkdir(exist_ok=True)
folder_directory = config.html_root
for folder in item_json["folder"]["parents"]:
folder_directory /= escape_filename(config, config.item_category_template.render(folder))
folder_directory /= escape_filename(config, config.item_category_template.render(item_json["folder"]))
folder_directory.mkdir(parents=True, exist_ok=True) # TODO move
html_name = truncate_filename(config, escape_filename(config, config.item_filename_template.render(item_json)))
@ -163,30 +333,24 @@ def synchronize_with_server(config, client_session):
grabbed_item_paths = []
folders = client_session.list_folders()
for (folder_name, folder_id) in folders:
print(f" Updating folder {folder_name}")
for category in folders:
folder_path, folder_id = category.title, category.id
print(f" Updating folder {folder_path}") # TODO fixme
def process(item_ids):
nonlocal new_items, updated_items, grabbed_item_paths
if len(item_ids) > 0:
item_contents = client_session.get_stream_items_contents(item_ids=item_ids)
for item_content in item_contents.items:
item_json = {
"id": item_content.id,
"folder": folder_name,
"title": item_content.title,
"published": item_content.published,
"updated": item_content.updated,
"author": item_content.author,
"summary": item_content.summary.content,
"content": item_content.content.content,
"origin_title": item_content.origin.title,
"origin_url": item_content.origin.html_url,
"canonical_url": item_content.canonical[0].href,
}
remaining, continuation = True, 0
while remaining:
articles = client_session.get_unread_articles_in_folder(folder_id, limit=config.items_per_query, continuation=continuation)
if len(articles) >= config.items_per_query:
continuation += len(articles)
else:
remaining = False
for item in articles:
item_json = item.asdict()
item_json["folder"] = category.asdict()
set_computed_fields_json(config, item_json)
json_path = config.json_root / f"{ sha256(item_json["id"].encode("utf-8")).hexdigest() }.json"
json_path = config.json_root / f"{ sha256(str(item_json["id"]).encode("utf-8")).hexdigest() }.json"
grabbed_item_paths.append(json_path)
write_files, updating = False, False
@ -206,14 +370,6 @@ def synchronize_with_server(config, client_session):
# write HTML
generate_html_for_item(config, item_json, regenerate=updating)
continuation = None
while continuation != '':
items = client_session.get_stream_items_ids(stream_id=folder_id, exclude_target="user/-/state/com.google/read", limit=config.items_per_query, continuation=continuation)
item_ids = [item.id for item in items.item_refs]
process(item_ids)
continuation = items.continuation
# Remove items that we didn't get from the server but are in the JSON directory
removed_items = 0
for item_path in config.json_root.glob("*.json"):
@ -328,16 +484,16 @@ def main():
config = Config()
if args.action == "sync":
client_session = ClientSession(config)
client_session = make_client_session(config)
synchronize(config, client_session)
elif args.action == "sync-up":
client_session = ClientSession(config)
client_session = make_client_session(config)
synchronize_local_changes(config, client_session)
elif args.action == "sync-down":
client_session = ClientSession(config)
client_session = make_client_session(config)
synchronize_remote_changes(config, client_session)
elif args.action == "daemon":
client_session = ClientSession(config)
client_session = make_client_session(config)
try:
asyncio.run(daemon(config, client_session))
except KeyboardInterrupt:

View file

@ -8,6 +8,7 @@ requires-python = ">=3.12"
dependencies = [
"jinja2>=3.1.6",
"requests>=2.32.5",
"ttrss-python>=0.5",
]
license = "ISC"
license-files = [ "LICENSE" ]

11
uv.lock generated
View file

@ -60,12 +60,14 @@ source = { virtual = "." }
dependencies = [
{ name = "jinja2" },
{ name = "requests" },
{ name = "ttrss-python" },
]
[package.metadata]
requires-dist = [
{ name = "jinja2", specifier = ">=3.1.6" },
{ name = "requests", specifier = ">=2.32.5" },
{ name = "ttrss-python", specifier = ">=0.5" },
]
[[package]]
@ -167,6 +169,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
]
[[package]]
name = "ttrss-python"
version = "0.5"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "requests" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a2/72/786e2edf469d6d1e048f3dd043a50ececf7674d10402d703d1297bb6e102/ttrss-python-0.5.tar.gz", hash = "sha256:ad7816b85e3c0b13822f321f91ed7f19dc3b82237f2d7838c2dcb9aac0f4ca07", size = 6247, upload-time = "2015-09-02T08:53:06.221Z" }
[[package]]
name = "urllib3"
version = "2.5.0"