mirror of
https://codeberg.org/Reuh/feather.git
synced 2025-10-27 18:19:32 +00:00
feat: allow retrieving read articles
This commit is contained in:
parent
b0e0c5d0df
commit
07e9d208b1
6 changed files with 140 additions and 81 deletions
|
|
@ -62,6 +62,6 @@ You need Python 3.12 or newer. Then pip it up, as the kids say.
|
|||
- [ ] Use inotify for real-time article mark-as-read action
|
||||
- [ ] Share the fun somewhere
|
||||
- [ ] Actually think about the issues created by the duplicate warning
|
||||
- [ ] Attachments
|
||||
- [ ] Get article attachments
|
||||
- [ ] Test with FreshRSS
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,14 @@
|
|||
"""Connection between the remote server and feather"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from ttrss.client import TTRClient
|
||||
import google_reader
|
||||
|
||||
from feather.config import Config
|
||||
from feather.articledata import Article, GReaderArticle, TTRArticle, ArticleId, Category, CategoryId
|
||||
from feather.data import Article, ArticleId, Category
|
||||
|
||||
class ClientSession(ABC):
|
||||
config: Config
|
||||
|
|
@ -22,7 +24,7 @@ class ClientSession(ABC):
|
|||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_unread_articles_in_category(self, category_id: CategoryId, limit: int, continuation: int=0) -> list[Article]:
|
||||
def get_articles_in_category(self, category: Category, limit: int, continuation: int = 0, unread_only: bool = False) -> list[Article]:
|
||||
"""Returns a list of Articles in the given category. limit and continuation are required for pagination."""
|
||||
pass
|
||||
|
||||
|
|
@ -51,11 +53,37 @@ class GReaderSession(ClientSession):
|
|||
l.append(Category(id=category_id, title=category_name))
|
||||
return l
|
||||
|
||||
def get_unread_articles_in_category(self, category, limit=500, continuation=0) -> list[GReaderArticle]:
|
||||
items_ids = self.greader.get_stream_items_ids(self.auth_token, stream_id=category.id, exclude_target="user/-/state/com.google/read", limit=limit, continuation=continuation)
|
||||
def get_articles_in_category(self, category: Category, limit: int = 1000, continuation: int = 0, unread_only: bool = False) -> list[GReaderArticle]:
|
||||
items_ids = self.greader.get_stream_items_ids(
|
||||
self.auth_token,
|
||||
stream_id=category.id,
|
||||
exclude_target="user/-/state/com.google/read" if unread_only else None,
|
||||
limit=limit,
|
||||
continuation=continuation,
|
||||
)
|
||||
item_contents = self.greader.get_stream_items_contents(self.auth_token, self.csrf_token, item_ids=[item.id for item in items_ids.item_refs])
|
||||
return [ GReaderArticle(self, category, item_content) for item_content in item_contents.items ]
|
||||
|
||||
class GReaderArticle(Article):
|
||||
def __init__(self, session: GReaderSession, category: Category, item_content):
|
||||
self.config = session.config
|
||||
|
||||
self.id = item_content.id
|
||||
self.category = category
|
||||
|
||||
self.unread = "user/-/state/com.google/read" not in item_content.categories
|
||||
self.title = item_content.title
|
||||
self.published = item_content.published
|
||||
self.updated = item_content.updated
|
||||
self.author = item_content.author
|
||||
self.summary = item_content.summary.content
|
||||
self.content = item_content.content.content
|
||||
self.feed_title = item_content.origin.title
|
||||
self.feed_url = item_content.origin.html_url
|
||||
self.article_url = item_content.canonical[0].href
|
||||
|
||||
self.compute_fields()
|
||||
|
||||
class TTRSession(ClientSession):
|
||||
"""Tiny Tiny RSS API client"""
|
||||
ttrss: TTRClient
|
||||
|
|
@ -93,6 +121,41 @@ class TTRSession(ClientSession):
|
|||
tree = self.ttrss.get_feed_tree()
|
||||
return get_categories_recursive(tree["categories"])
|
||||
|
||||
def get_unread_articles_in_category(self, category, limit=100, continuation=0) -> list[TTRArticle]:
|
||||
headlines = self.ttrss.get_headlines(feed_id=category.id, limit=limit, skip=continuation, is_cat=True, show_excerpt=True, show_content=True, view_mode="unread", include_attachments=False, include_nested=False)
|
||||
def get_articles_in_category(self, category: Category, limit: int = 200, continuation: int = 0, unread_only: bool = False) -> list[TTRArticle]:
|
||||
headlines = self.ttrss.get_headlines(
|
||||
feed_id=category.id,
|
||||
limit=limit,
|
||||
skip=continuation,
|
||||
is_cat=True,
|
||||
show_excerpt=True,
|
||||
show_content=True,
|
||||
view_mode="unread" if unread_only else "all_articles",
|
||||
include_attachments=False,
|
||||
include_nested=False,
|
||||
)
|
||||
return [ TTRArticle(self, category, headline) for headline in headlines ]
|
||||
|
||||
class TTRArticle(Article):
|
||||
def __init__(self, session: TTRSession, category: Category, article):
|
||||
self.config = session.config
|
||||
|
||||
self.id = article.id
|
||||
self.category = category
|
||||
|
||||
self.unread = article.unread
|
||||
self.title = article.title
|
||||
self.published = article.updated.timestamp()
|
||||
self.updated = article.updated.timestamp()
|
||||
self.author = article.author
|
||||
self.summary = article.excerpt
|
||||
self.content = article.content
|
||||
self.feed_title = article.feed_title
|
||||
self.feed_url = article.site_url
|
||||
self.feed_icon_url = session.feeds[article.feed_id]["icon"]
|
||||
self.feed_order = session.feeds[article.feed_id]["order"]
|
||||
self.article_url = article.link
|
||||
self.comments_url = article.comments_link
|
||||
self.language = article.lang
|
||||
self.image_url = article.flavor_image
|
||||
|
||||
self.compute_fields()
|
||||
|
|
@ -1,3 +1,8 @@
|
|||
# Feather default configuration file.
|
||||
# You can overwrite any of these values by either:
|
||||
# - creating a config.toml file containing your user configuration. You can choose another filename by setting the CONFIG_PATH environment variable.
|
||||
# - setting environment variables for the values you want to overwrite (the environment variable name for each value can be found in the comments below).
|
||||
|
||||
[server]
|
||||
# Server API to use. Either "googlereader" for the Google Reader API (FreshRSS, Miniflux, etc.) or "ttrss" for the TinyTiny-RSS API.
|
||||
# The Google Reader API do not support nested categories.
|
||||
|
|
@ -12,11 +17,15 @@ user = "username"
|
|||
# Can be set through the environment variable SERVER_PASSWORD.
|
||||
password = "password"
|
||||
# How many items to retrieve at most from the server in a single request. Lower values will make synchronization slower, higher values might make the server complain.
|
||||
# If you are missing articles after a sync, it might be because this value is too high.
|
||||
# If you are using the Google Reader API: servers should be okay with up to 1000.
|
||||
# If you are using the ttrss API: servers should be okay with up to 200.
|
||||
# Set to 0 to let feather choose.
|
||||
# Can be set through the environment variable SERVER_ITEMS_PER_REQUEST.
|
||||
items_per_request = 0
|
||||
# Set to 0 to let feather choose (200 for ttrss, 1000 for googlereader).
|
||||
# Can be set through the environment variable SERVER_ARTICLES_PER_REQUEST.
|
||||
articles_per_request = 0
|
||||
# Set to true to only sync unread articles; feather will not retrieve or store any read article.
|
||||
# Can be set through the environment variable SERVER_ONLY_SYNC_UNREAD_ARTICLES.
|
||||
only_sync_unread_articles = true
|
||||
|
||||
[directories]
|
||||
# Directory path where the internal feather data will be stored.
|
||||
|
|
@ -27,9 +36,28 @@ data = "data"
|
|||
reader = "reader"
|
||||
|
||||
[html]
|
||||
# HTML template used for generating item HTML files. All templates are Jinja2 templates.
|
||||
# HTML template used for generating article HTML files. All templates are Jinja2 templates.
|
||||
# Available fields:
|
||||
# - id: article id (int | str)
|
||||
# - title: article title (str)
|
||||
# - published: article publication time (timestamp) (int)
|
||||
# - published_formatted: article publication time (text) (str)
|
||||
# - updated: article update time (timestamp) (int)
|
||||
# - updated_formatted: article publication time (text) (str)
|
||||
# - author: article author (str)
|
||||
# - summary: article summary (HTML) (str)
|
||||
# - content: article content (HTML) (str)
|
||||
# - feed_title: feed title (str)
|
||||
# - feed_url: feed URL (str)
|
||||
# - feed_icon_url: feed icon URL (str)
|
||||
# - feed_order: feed display order, starting from 1 (0 if unknown) (int)
|
||||
# - article_url: article URL (str)
|
||||
# - comments_url: article comments URL (str)
|
||||
# - language: article language (str)
|
||||
# - image_url: article main image (str)
|
||||
# - category: feed category (Category)
|
||||
# Can be set through the environment variable HTML_TEMPLATE.
|
||||
template = '''
|
||||
article_template = '''
|
||||
<!doctype html>
|
||||
<html lang="en-US">
|
||||
<head>
|
||||
|
|
@ -68,9 +96,15 @@ template = '''
|
|||
</html>
|
||||
'''
|
||||
# Filename template for generated HTML files.
|
||||
# The available fields are the same as for template.
|
||||
# Can be set through the environment variable HTML_FILENAME_TEMPLATE.
|
||||
filename_template = "[{{ feed_title }}]\t{{ title }} ({{ published_formatted }}).html"
|
||||
# Category directory name template for generated HTML files.
|
||||
# Fields availables:
|
||||
# - id: category id (str | int)
|
||||
# - title: category name (str)
|
||||
# - parents: list of parent categories (list[Category])
|
||||
# - order: category display order, starting from 1 (0 if unknown) (int)
|
||||
# Can be set through the environment variable HTML_CATEGORY_TEMPLATE.
|
||||
category_template = "{% if order %}{{ '%02d' % order }} {% endif %}{{ title }}"
|
||||
# Maximum allowed filename length (in bytes assuming UTF-8 encoding) before truncating. Depending on your filesystem filename's limits it may be possible to increase the value, ask Wikipedia for details.
|
||||
|
|
|
|||
|
|
@ -43,20 +43,21 @@ class Config:
|
|||
|
||||
self.server_api: str = str(get_config("server", "api"))
|
||||
if self.server_api not in ("googlereader", "ttrss"):
|
||||
raise ConfigurationError(f"server.api must be either ttrss or googlereader")
|
||||
raise ConfigurationError(f"server.api must be either ttrss or googlereader, not {self.server_api}")
|
||||
self.server_url: str = str(get_config("server", "url", False))
|
||||
self.server_user: str = str(get_config("server", "user", False))
|
||||
self.server_password: str = str(get_config("server", "password", False))
|
||||
self.items_per_query: int = int(get_config("server", "items_per_request"))
|
||||
if self.items_per_query == 0:
|
||||
self.items_per_query = 1000 if self.server_api == "googlereader" else 200
|
||||
self.articles_per_query: int = int(get_config("server", "articles_per_request"))
|
||||
if self.articles_per_query == 0:
|
||||
self.articles_per_query = 1000 if self.server_api == "googlereader" else 200
|
||||
self.only_sync_unread_articles: bool = bool(get_config("server", "only_sync_unread_articles"))
|
||||
|
||||
self.timezone: ZoneInfo = ZoneInfo(str(get_config("datetime", "timezone")))
|
||||
self.time_format: str = str(get_config("datetime", "format"))
|
||||
|
||||
self.item_template: Template = Template(str(get_config("html", "template")), autoescape=True)
|
||||
self.item_filename_template: Template = Template(str(get_config("html", "filename_template")), autoescape=False)
|
||||
self.item_category_template: Template = Template(str(get_config("html", "category_template")), autoescape=False)
|
||||
self.article_template: Template = Template(str(get_config("html", "article_template")), autoescape=True)
|
||||
self.article_filename_template: Template = Template(str(get_config("html", "filename_template")), autoescape=False)
|
||||
self.article_category_template: Template = Template(str(get_config("html", "category_template")), autoescape=False)
|
||||
self.max_filename_length: int = int(get_config("html", "max_filename_length"))
|
||||
self.filename_translation = str.maketrans(get_config("html", "filename_replacement"))
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from __future__ import annotations
|
|||
|
||||
import os
|
||||
import json
|
||||
from abc import ABC, abstractmethod
|
||||
from abc import ABC
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from hashlib import sha256
|
||||
|
|
@ -58,16 +58,22 @@ type ArticleId = int | str
|
|||
|
||||
class Article(ABC):
|
||||
config: Config
|
||||
|
||||
json_path: Path
|
||||
html_path: str
|
||||
|
||||
# fields serialized into the JSON file #
|
||||
|
||||
# no default value
|
||||
id: ArticleId # article id
|
||||
category: Category # feed category
|
||||
# no default value, computed by compute_fields
|
||||
published_formatted: str # article publication time (text)
|
||||
updated_formatted: str # article publication time (text)
|
||||
html_path: str # html path, relative to the html_root directory
|
||||
# with default value
|
||||
unread: bool = True # if the article is unread
|
||||
title: str = "" # article title
|
||||
published: int = 0 # article publication time (timestamp)
|
||||
published_formatted: str # article publication time (text)
|
||||
updated: int = 0 # article update time (timestamp)
|
||||
updated_formatted: str # article publication time (text)
|
||||
author: str = "" # article author
|
||||
summary: str = "" # article summary (HTML)
|
||||
content: str = "" # article content (HTML)
|
||||
|
|
@ -79,16 +85,15 @@ class Article(ABC):
|
|||
comments_url: str = "" # article comments URL
|
||||
language: str = "" # article language
|
||||
image_url: str = "" # article main image
|
||||
category: Category # feed category
|
||||
|
||||
def get_html_path(self):
|
||||
config = self.config
|
||||
category_directory = config.html_root
|
||||
for category in self.category.parents:
|
||||
category_directory /= escape_filename(config, config.item_category_template.render(category.asdict()))
|
||||
category_directory /= escape_filename(config, config.item_category_template.render(self.category.asdict()))
|
||||
category_directory /= escape_filename(config, config.article_category_template.render(category.asdict()))
|
||||
category_directory /= escape_filename(config, config.article_category_template.render(self.category.asdict()))
|
||||
|
||||
html_name = truncate_filename(config, escape_filename(config, config.item_filename_template.render(self.get_template_dict())))
|
||||
html_name = truncate_filename(config, escape_filename(config, config.article_filename_template.render(self.get_template_dict())))
|
||||
|
||||
return category_directory / html_name
|
||||
|
||||
|
|
@ -100,13 +105,13 @@ class Article(ABC):
|
|||
self.html_path = str(self.get_html_path().relative_to(config.html_root)) # TODO: do this dynamically on write, handle overwrite conflict at the same time
|
||||
|
||||
def get_template_dict(self) -> dict:
|
||||
template_fields = ("id", "title", "published", "published_formatted", "updated", "updated_formatted", "author", "summary", "content", "feed_title", "feed_url", "feed_icon_url", "feed_order", "article_url", "comments_url", "language", "image_url")
|
||||
template_fields = ("id", "unread", "title", "published", "published_formatted", "updated", "updated_formatted", "author", "summary", "content", "feed_title", "feed_url", "feed_icon_url", "feed_order", "article_url", "comments_url", "language", "image_url")
|
||||
d = { field: getattr(self, field) for field in template_fields }
|
||||
d["category"] = self.category.asdict()
|
||||
return d
|
||||
|
||||
def write_json(self):
|
||||
stored_fields = ("id", "title", "published", "published_formatted", "updated", "updated_formatted", "author", "summary", "content", "feed_title", "feed_url", "feed_icon_url", "feed_order", "article_url", "comments_url", "language", "image_url", "html_path")
|
||||
stored_fields = ("id", "unread", "title", "published", "published_formatted", "updated", "updated_formatted", "author", "summary", "content", "feed_title", "feed_url", "feed_icon_url", "feed_order", "article_url", "comments_url", "language", "image_url", "html_path")
|
||||
item_json = { field: getattr(self, field) for field in stored_fields }
|
||||
item_json["category"] = self.category.asdict()
|
||||
if self.json_path.exists():
|
||||
|
|
@ -125,7 +130,7 @@ class Article(ABC):
|
|||
else:
|
||||
html_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with html_path.open("w") as f:
|
||||
f.write(config.item_template.render(self.get_template_dict()))
|
||||
f.write(config.article_template.render(self.get_template_dict()))
|
||||
# set accessed date to update time, modified to publication time
|
||||
os.utime(html_path, (max(self.updated, self.updated), self.published))
|
||||
def delete_html(self, ignore_deleted=False):
|
||||
|
|
@ -145,53 +150,9 @@ class Article(ABC):
|
|||
self.compute_fields() # recompute formatted datetime & paths from the current configuration
|
||||
self.write() # rewrite JSON & HTML
|
||||
|
||||
class GReaderArticle(Article):
|
||||
def __init__(self, session: GReaderSession, category: Category, item_content):
|
||||
self.config = session.config
|
||||
|
||||
self.category = category
|
||||
|
||||
self.id = item_content.id
|
||||
self.title = item_content.title
|
||||
self.published = item_content.published
|
||||
self.updated = item_content.updated
|
||||
self.author = item_content.author
|
||||
self.summary = item_content.summary.content
|
||||
self.content = item_content.content.content
|
||||
self.feed_title = item_content.origin.title
|
||||
self.feed_url = item_content.origin.html_url
|
||||
self.article_url = item_content.canonical[0].href
|
||||
|
||||
self.compute_fields()
|
||||
|
||||
class TTRArticle(Article):
|
||||
def __init__(self, session: TRRSession, category: Category, article):
|
||||
self.config = session.config
|
||||
|
||||
self.category = category
|
||||
|
||||
self.id = article.id
|
||||
self.title = article.title
|
||||
self.published = article.updated.timestamp()
|
||||
self.updated = article.updated.timestamp()
|
||||
self.author = article.author
|
||||
self.summary = article.excerpt
|
||||
self.content = article.content
|
||||
self.feed_title = article.feed_title
|
||||
self.feed_url = article.site_url
|
||||
self.feed_icon_url = session.feeds[article.feed_id]["icon"]
|
||||
self.feed_order = session.feeds[article.feed_id]["order"]
|
||||
self.article_url = article.link
|
||||
self.comments_url = article.comments_link
|
||||
self.language = article.lang
|
||||
self.image_url = article.flavor_image
|
||||
|
||||
self.compute_fields()
|
||||
|
||||
class FileArticle(Article):
|
||||
def __init__(self, config: Config, json_path: Path) -> Article:
|
||||
self.config = config
|
||||
|
||||
self.json_path = json_path
|
||||
|
||||
item_json = json.load(json_path.open("r"))
|
||||
|
|
@ -4,8 +4,8 @@ import asyncio
|
|||
import signal
|
||||
|
||||
from feather.config import Config
|
||||
from feather.feedreaderclient import GReaderSession, TTRSession, ClientSession
|
||||
from feather.articledata import FileArticle
|
||||
from feather.client import GReaderSession, TTRSession, ClientSession
|
||||
from feather.data import FileArticle
|
||||
|
||||
class FeatherApp:
|
||||
config: Config
|
||||
|
|
@ -64,8 +64,8 @@ class FeatherApp:
|
|||
article.delete()
|
||||
marked_as_read += 1
|
||||
|
||||
for i in range(0, len(to_mark_as_read), config.items_per_query):
|
||||
client_session.mark_as_read(to_mark_as_read[i:i+config.items_per_query])
|
||||
for i in range(0, len(to_mark_as_read), config.articles_per_query):
|
||||
client_session.mark_as_read(to_mark_as_read[i:i+config.articles_per_query])
|
||||
|
||||
print(f"Marked {marked_as_read} items as read")
|
||||
|
||||
|
|
@ -86,8 +86,8 @@ class FeatherApp:
|
|||
|
||||
remaining, continuation = True, 0
|
||||
while remaining:
|
||||
articles = client_session.get_unread_articles_in_category(category, limit=config.items_per_query, continuation=continuation)
|
||||
if len(articles) >= config.items_per_query:
|
||||
articles = client_session.get_articles_in_category(category, limit=config.articles_per_query, continuation=continuation, unread_only=config.only_sync_unread_articles)
|
||||
if len(articles) >= config.articles_per_query:
|
||||
continuation += len(articles)
|
||||
else:
|
||||
remaining = False
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue