1
0
Fork 0
mirror of https://codeberg.org/Reuh/feather.git synced 2025-10-28 02:29:32 +00:00

feat: allow retrieving read articles

This commit is contained in:
Étienne Fildadut 2025-10-11 14:35:14 +02:00
parent b0e0c5d0df
commit 07e9d208b1
6 changed files with 140 additions and 81 deletions

View file

@ -62,6 +62,6 @@ You need Python 3.12 or newer. Then pip it up, as the kids say.
- [ ] Use inotify for real-time article mark-as-read action - [ ] Use inotify for real-time article mark-as-read action
- [ ] Share the fun somewhere - [ ] Share the fun somewhere
- [ ] Actually think about the issues created by the duplicate warning - [ ] Actually think about the issues created by the duplicate warning
- [ ] Attachments - [ ] Get article attachments
- [ ] Test with FreshRSS - [ ] Test with FreshRSS

View file

@ -1,12 +1,14 @@
"""Connection between the remote server and feather""" """Connection between the remote server and feather"""
from __future__ import annotations
import re import re
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from ttrss.client import TTRClient from ttrss.client import TTRClient
import google_reader import google_reader
from feather.config import Config from feather.config import Config
from feather.articledata import Article, GReaderArticle, TTRArticle, ArticleId, Category, CategoryId from feather.data import Article, ArticleId, Category
class ClientSession(ABC): class ClientSession(ABC):
config: Config config: Config
@ -22,7 +24,7 @@ class ClientSession(ABC):
pass pass
@abstractmethod @abstractmethod
def get_unread_articles_in_category(self, category_id: CategoryId, limit: int, continuation: int=0) -> list[Article]: def get_articles_in_category(self, category: Category, limit: int, continuation: int = 0, unread_only: bool = False) -> list[Article]:
"""Returns a list of Articles in the given category. limit and continuation are required for pagination.""" """Returns a list of Articles in the given category. limit and continuation are required for pagination."""
pass pass
@ -51,11 +53,37 @@ class GReaderSession(ClientSession):
l.append(Category(id=category_id, title=category_name)) l.append(Category(id=category_id, title=category_name))
return l return l
def get_unread_articles_in_category(self, category, limit=500, continuation=0) -> list[GReaderArticle]: def get_articles_in_category(self, category: Category, limit: int = 1000, continuation: int = 0, unread_only: bool = False) -> list[GReaderArticle]:
items_ids = self.greader.get_stream_items_ids(self.auth_token, stream_id=category.id, exclude_target="user/-/state/com.google/read", limit=limit, continuation=continuation) items_ids = self.greader.get_stream_items_ids(
self.auth_token,
stream_id=category.id,
exclude_target="user/-/state/com.google/read" if unread_only else None,
limit=limit,
continuation=continuation,
)
item_contents = self.greader.get_stream_items_contents(self.auth_token, self.csrf_token, item_ids=[item.id for item in items_ids.item_refs]) item_contents = self.greader.get_stream_items_contents(self.auth_token, self.csrf_token, item_ids=[item.id for item in items_ids.item_refs])
return [ GReaderArticle(self, category, item_content) for item_content in item_contents.items ] return [ GReaderArticle(self, category, item_content) for item_content in item_contents.items ]
class GReaderArticle(Article):
def __init__(self, session: GReaderSession, category: Category, item_content):
self.config = session.config
self.id = item_content.id
self.category = category
self.unread = "user/-/state/com.google/read" not in item_content.categories
self.title = item_content.title
self.published = item_content.published
self.updated = item_content.updated
self.author = item_content.author
self.summary = item_content.summary.content
self.content = item_content.content.content
self.feed_title = item_content.origin.title
self.feed_url = item_content.origin.html_url
self.article_url = item_content.canonical[0].href
self.compute_fields()
class TTRSession(ClientSession): class TTRSession(ClientSession):
"""Tiny Tiny RSS API client""" """Tiny Tiny RSS API client"""
ttrss: TTRClient ttrss: TTRClient
@ -93,6 +121,41 @@ class TTRSession(ClientSession):
tree = self.ttrss.get_feed_tree() tree = self.ttrss.get_feed_tree()
return get_categories_recursive(tree["categories"]) return get_categories_recursive(tree["categories"])
def get_unread_articles_in_category(self, category, limit=100, continuation=0) -> list[TTRArticle]: def get_articles_in_category(self, category: Category, limit: int = 200, continuation: int = 0, unread_only: bool = False) -> list[TTRArticle]:
headlines = self.ttrss.get_headlines(feed_id=category.id, limit=limit, skip=continuation, is_cat=True, show_excerpt=True, show_content=True, view_mode="unread", include_attachments=False, include_nested=False) headlines = self.ttrss.get_headlines(
feed_id=category.id,
limit=limit,
skip=continuation,
is_cat=True,
show_excerpt=True,
show_content=True,
view_mode="unread" if unread_only else "all_articles",
include_attachments=False,
include_nested=False,
)
return [ TTRArticle(self, category, headline) for headline in headlines ] return [ TTRArticle(self, category, headline) for headline in headlines ]
class TTRArticle(Article):
def __init__(self, session: TTRSession, category: Category, article):
self.config = session.config
self.id = article.id
self.category = category
self.unread = article.unread
self.title = article.title
self.published = article.updated.timestamp()
self.updated = article.updated.timestamp()
self.author = article.author
self.summary = article.excerpt
self.content = article.content
self.feed_title = article.feed_title
self.feed_url = article.site_url
self.feed_icon_url = session.feeds[article.feed_id]["icon"]
self.feed_order = session.feeds[article.feed_id]["order"]
self.article_url = article.link
self.comments_url = article.comments_link
self.language = article.lang
self.image_url = article.flavor_image
self.compute_fields()

View file

@ -1,3 +1,8 @@
# Feather default configuration file.
# You can overwrite any of these values by either:
# - creating a config.toml file containing your user configuration. You can choose another filename by setting the CONFIG_PATH environment variable.
# - setting environment variables for the values you want to overwrite (the environment variable name for each value can be found in the comments below).
[server] [server]
# Server API to use. Either "googlereader" for the Google Reader API (FreshRSS, Miniflux, etc.) or "ttrss" for the TinyTiny-RSS API. # Server API to use. Either "googlereader" for the Google Reader API (FreshRSS, Miniflux, etc.) or "ttrss" for the TinyTiny-RSS API.
# The Google Reader API do not support nested categories. # The Google Reader API do not support nested categories.
@ -12,11 +17,15 @@ user = "username"
# Can be set through the environment variable SERVER_PASSWORD. # Can be set through the environment variable SERVER_PASSWORD.
password = "password" password = "password"
# How many items to retrieve at most from the server in a single request. Lower values will make synchronization slower, higher values might make the server complain. # How many items to retrieve at most from the server in a single request. Lower values will make synchronization slower, higher values might make the server complain.
# If you are missing articles after a sync, it might be because this value is too high.
# If you are using the Google Reader API: servers should be okay with up to 1000. # If you are using the Google Reader API: servers should be okay with up to 1000.
# If you are using the ttrss API: servers should be okay with up to 200. # If you are using the ttrss API: servers should be okay with up to 200.
# Set to 0 to let feather choose. # Set to 0 to let feather choose (200 for ttrss, 1000 for googlereader).
# Can be set through the environment variable SERVER_ITEMS_PER_REQUEST. # Can be set through the environment variable SERVER_ARTICLES_PER_REQUEST.
items_per_request = 0 articles_per_request = 0
# Set to true to only sync unread articles; feather will not retrieve or store any read article.
# Can be set through the environment variable SERVER_ONLY_SYNC_UNREAD_ARTICLES.
only_sync_unread_articles = true
[directories] [directories]
# Directory path where the internal feather data will be stored. # Directory path where the internal feather data will be stored.
@ -27,9 +36,28 @@ data = "data"
reader = "reader" reader = "reader"
[html] [html]
# HTML template used for generating item HTML files. All templates are Jinja2 templates. # HTML template used for generating article HTML files. All templates are Jinja2 templates.
# Available fields:
# - id: article id (int | str)
# - title: article title (str)
# - published: article publication time (timestamp) (int)
# - published_formatted: article publication time (text) (str)
# - updated: article update time (timestamp) (int)
# - updated_formatted: article publication time (text) (str)
# - author: article author (str)
# - summary: article summary (HTML) (str)
# - content: article content (HTML) (str)
# - feed_title: feed title (str)
# - feed_url: feed URL (str)
# - feed_icon_url: feed icon URL (str)
# - feed_order: feed display order, starting from 1 (0 if unknown) (int)
# - article_url: article URL (str)
# - comments_url: article comments URL (str)
# - language: article language (str)
# - image_url: article main image (str)
# - category: feed category (Category)
# Can be set through the environment variable HTML_TEMPLATE. # Can be set through the environment variable HTML_TEMPLATE.
template = ''' article_template = '''
<!doctype html> <!doctype html>
<html lang="en-US"> <html lang="en-US">
<head> <head>
@ -68,9 +96,15 @@ template = '''
</html> </html>
''' '''
# Filename template for generated HTML files. # Filename template for generated HTML files.
# The available fields are the same as for template.
# Can be set through the environment variable HTML_FILENAME_TEMPLATE. # Can be set through the environment variable HTML_FILENAME_TEMPLATE.
filename_template = "[{{ feed_title }}]\t{{ title }} ({{ published_formatted }}).html" filename_template = "[{{ feed_title }}]\t{{ title }} ({{ published_formatted }}).html"
# Category directory name template for generated HTML files. # Category directory name template for generated HTML files.
# Fields availables:
# - id: category id (str | int)
# - title: category name (str)
# - parents: list of parent categories (list[Category])
# - order: category display order, starting from 1 (0 if unknown) (int)
# Can be set through the environment variable HTML_CATEGORY_TEMPLATE. # Can be set through the environment variable HTML_CATEGORY_TEMPLATE.
category_template = "{% if order %}{{ '%02d' % order }} {% endif %}{{ title }}" category_template = "{% if order %}{{ '%02d' % order }} {% endif %}{{ title }}"
# Maximum allowed filename length (in bytes assuming UTF-8 encoding) before truncating. Depending on your filesystem filename's limits it may be possible to increase the value, ask Wikipedia for details. # Maximum allowed filename length (in bytes assuming UTF-8 encoding) before truncating. Depending on your filesystem filename's limits it may be possible to increase the value, ask Wikipedia for details.

View file

@ -43,20 +43,21 @@ class Config:
self.server_api: str = str(get_config("server", "api")) self.server_api: str = str(get_config("server", "api"))
if self.server_api not in ("googlereader", "ttrss"): if self.server_api not in ("googlereader", "ttrss"):
raise ConfigurationError(f"server.api must be either ttrss or googlereader") raise ConfigurationError(f"server.api must be either ttrss or googlereader, not {self.server_api}")
self.server_url: str = str(get_config("server", "url", False)) self.server_url: str = str(get_config("server", "url", False))
self.server_user: str = str(get_config("server", "user", False)) self.server_user: str = str(get_config("server", "user", False))
self.server_password: str = str(get_config("server", "password", False)) self.server_password: str = str(get_config("server", "password", False))
self.items_per_query: int = int(get_config("server", "items_per_request")) self.articles_per_query: int = int(get_config("server", "articles_per_request"))
if self.items_per_query == 0: if self.articles_per_query == 0:
self.items_per_query = 1000 if self.server_api == "googlereader" else 200 self.articles_per_query = 1000 if self.server_api == "googlereader" else 200
self.only_sync_unread_articles: bool = bool(get_config("server", "only_sync_unread_articles"))
self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone"))) self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone")))
self.time_format: str = str(get_config("datetime", "format")) self.time_format: str = str(get_config("datetime", "format"))
self.item_template: Template = Template(str(get_config("html", "template")), autoescape=True) self.article_template: Template = Template(str(get_config("html", "article_template")), autoescape=True)
self.item_filename_template: Template = Template(str(get_config("html", "filename_template")), autoescape=False) self.article_filename_template: Template = Template(str(get_config("html", "filename_template")), autoescape=False)
self.item_category_template: Template = Template(str(get_config("html", "category_template")), autoescape=False) self.article_category_template: Template = Template(str(get_config("html", "category_template")), autoescape=False)
self.max_filename_length: int = int(get_config("html", "max_filename_length")) self.max_filename_length: int = int(get_config("html", "max_filename_length"))
self.filename_translation = str.maketrans(get_config("html", "filename_replacement")) self.filename_translation = str.maketrans(get_config("html", "filename_replacement"))

View file

@ -4,7 +4,7 @@ from __future__ import annotations
import os import os
import json import json
from abc import ABC, abstractmethod from abc import ABC
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from hashlib import sha256 from hashlib import sha256
@ -58,16 +58,22 @@ type ArticleId = int | str
class Article(ABC): class Article(ABC):
config: Config config: Config
json_path: Path json_path: Path
html_path: str
# fields serialized into the JSON file #
# no default value
id: ArticleId # article id id: ArticleId # article id
category: Category # feed category
# no default value, computed by compute_fields
published_formatted: str # article publication time (text)
updated_formatted: str # article publication time (text)
html_path: str # html path, relative to the html_root directory
# with default value
unread: bool = True # if the article is unread
title: str = "" # article title title: str = "" # article title
published: int = 0 # article publication time (timestamp) published: int = 0 # article publication time (timestamp)
published_formatted: str # article publication time (text)
updated: int = 0 # article update time (timestamp) updated: int = 0 # article update time (timestamp)
updated_formatted: str # article publication time (text)
author: str = "" # article author author: str = "" # article author
summary: str = "" # article summary (HTML) summary: str = "" # article summary (HTML)
content: str = "" # article content (HTML) content: str = "" # article content (HTML)
@ -79,16 +85,15 @@ class Article(ABC):
comments_url: str = "" # article comments URL comments_url: str = "" # article comments URL
language: str = "" # article language language: str = "" # article language
image_url: str = "" # article main image image_url: str = "" # article main image
category: Category # feed category
def get_html_path(self): def get_html_path(self):
config = self.config config = self.config
category_directory = config.html_root category_directory = config.html_root
for category in self.category.parents: for category in self.category.parents:
category_directory /= escape_filename(config, config.item_category_template.render(category.asdict())) category_directory /= escape_filename(config, config.article_category_template.render(category.asdict()))
category_directory /= escape_filename(config, config.item_category_template.render(self.category.asdict())) category_directory /= escape_filename(config, config.article_category_template.render(self.category.asdict()))
html_name = truncate_filename(config, escape_filename(config, config.item_filename_template.render(self.get_template_dict()))) html_name = truncate_filename(config, escape_filename(config, config.article_filename_template.render(self.get_template_dict())))
return category_directory / html_name return category_directory / html_name
@ -100,13 +105,13 @@ class Article(ABC):
self.html_path = str(self.get_html_path().relative_to(config.html_root)) # TODO: do this dynamically on write, handle overwrite conflict at the same time self.html_path = str(self.get_html_path().relative_to(config.html_root)) # TODO: do this dynamically on write, handle overwrite conflict at the same time
def get_template_dict(self) -> dict: def get_template_dict(self) -> dict:
template_fields = ("id", "title", "published", "published_formatted", "updated", "updated_formatted", "author", "summary", "content", "feed_title", "feed_url", "feed_icon_url", "feed_order", "article_url", "comments_url", "language", "image_url") template_fields = ("id", "unread", "title", "published", "published_formatted", "updated", "updated_formatted", "author", "summary", "content", "feed_title", "feed_url", "feed_icon_url", "feed_order", "article_url", "comments_url", "language", "image_url")
d = { field: getattr(self, field) for field in template_fields } d = { field: getattr(self, field) for field in template_fields }
d["category"] = self.category.asdict() d["category"] = self.category.asdict()
return d return d
def write_json(self): def write_json(self):
stored_fields = ("id", "title", "published", "published_formatted", "updated", "updated_formatted", "author", "summary", "content", "feed_title", "feed_url", "feed_icon_url", "feed_order", "article_url", "comments_url", "language", "image_url", "html_path") stored_fields = ("id", "unread", "title", "published", "published_formatted", "updated", "updated_formatted", "author", "summary", "content", "feed_title", "feed_url", "feed_icon_url", "feed_order", "article_url", "comments_url", "language", "image_url", "html_path")
item_json = { field: getattr(self, field) for field in stored_fields } item_json = { field: getattr(self, field) for field in stored_fields }
item_json["category"] = self.category.asdict() item_json["category"] = self.category.asdict()
if self.json_path.exists(): if self.json_path.exists():
@ -125,7 +130,7 @@ class Article(ABC):
else: else:
html_path.parent.mkdir(parents=True, exist_ok=True) html_path.parent.mkdir(parents=True, exist_ok=True)
with html_path.open("w") as f: with html_path.open("w") as f:
f.write(config.item_template.render(self.get_template_dict())) f.write(config.article_template.render(self.get_template_dict()))
# set accessed date to update time, modified to publication time # set accessed date to update time, modified to publication time
os.utime(html_path, (max(self.updated, self.updated), self.published)) os.utime(html_path, (max(self.updated, self.updated), self.published))
def delete_html(self, ignore_deleted=False): def delete_html(self, ignore_deleted=False):
@ -145,53 +150,9 @@ class Article(ABC):
self.compute_fields() # recompute formatted datetime & paths from the current configuration self.compute_fields() # recompute formatted datetime & paths from the current configuration
self.write() # rewrite JSON & HTML self.write() # rewrite JSON & HTML
class GReaderArticle(Article):
def __init__(self, session: GReaderSession, category: Category, item_content):
self.config = session.config
self.category = category
self.id = item_content.id
self.title = item_content.title
self.published = item_content.published
self.updated = item_content.updated
self.author = item_content.author
self.summary = item_content.summary.content
self.content = item_content.content.content
self.feed_title = item_content.origin.title
self.feed_url = item_content.origin.html_url
self.article_url = item_content.canonical[0].href
self.compute_fields()
class TTRArticle(Article):
def __init__(self, session: TRRSession, category: Category, article):
self.config = session.config
self.category = category
self.id = article.id
self.title = article.title
self.published = article.updated.timestamp()
self.updated = article.updated.timestamp()
self.author = article.author
self.summary = article.excerpt
self.content = article.content
self.feed_title = article.feed_title
self.feed_url = article.site_url
self.feed_icon_url = session.feeds[article.feed_id]["icon"]
self.feed_order = session.feeds[article.feed_id]["order"]
self.article_url = article.link
self.comments_url = article.comments_link
self.language = article.lang
self.image_url = article.flavor_image
self.compute_fields()
class FileArticle(Article): class FileArticle(Article):
def __init__(self, config: Config, json_path: Path) -> Article: def __init__(self, config: Config, json_path: Path) -> Article:
self.config = config self.config = config
self.json_path = json_path self.json_path = json_path
item_json = json.load(json_path.open("r")) item_json = json.load(json_path.open("r"))

View file

@ -4,8 +4,8 @@ import asyncio
import signal import signal
from feather.config import Config from feather.config import Config
from feather.feedreaderclient import GReaderSession, TTRSession, ClientSession from feather.client import GReaderSession, TTRSession, ClientSession
from feather.articledata import FileArticle from feather.data import FileArticle
class FeatherApp: class FeatherApp:
config: Config config: Config
@ -64,8 +64,8 @@ class FeatherApp:
article.delete() article.delete()
marked_as_read += 1 marked_as_read += 1
for i in range(0, len(to_mark_as_read), config.items_per_query): for i in range(0, len(to_mark_as_read), config.articles_per_query):
client_session.mark_as_read(to_mark_as_read[i:i+config.items_per_query]) client_session.mark_as_read(to_mark_as_read[i:i+config.articles_per_query])
print(f"Marked {marked_as_read} items as read") print(f"Marked {marked_as_read} items as read")
@ -86,8 +86,8 @@ class FeatherApp:
remaining, continuation = True, 0 remaining, continuation = True, 0
while remaining: while remaining:
articles = client_session.get_unread_articles_in_category(category, limit=config.items_per_query, continuation=continuation) articles = client_session.get_articles_in_category(category, limit=config.articles_per_query, continuation=continuation, unread_only=config.only_sync_unread_articles)
if len(articles) >= config.items_per_query: if len(articles) >= config.articles_per_query:
continuation += len(articles) continuation += len(articles)
else: else:
remaining = False remaining = False