1
0
Fork 0
mirror of https://codeberg.org/Reuh/feather.git synced 2025-10-27 10:09:32 +00:00

Initial commit

This commit is contained in:
Étienne Fildadut 2025-10-09 13:50:18 +02:00
commit 824d0ad839
8 changed files with 1054 additions and 0 deletions

17
.gitignore vendored Normal file
View file

@ -0,0 +1,17 @@
# Python-generated files
__pycache__/
*.py[oc]
build/
dist/
wheels/
*.egg-info
# Virtual environments
.venv
# Config file
config.toml
# Runtime files
reader/
data/

1
.python-version Normal file
View file

@ -0,0 +1 @@
3.12

0
README.md Normal file
View file

23
config.default.toml Normal file
View file

@ -0,0 +1,23 @@
[server]
# (Required) URL of your server's Google Reader API endpoint
url = "https://rss.example.com/"
# (Required) Username/email-adress used to connect to the server
user = "username"
# (Required) Password/API password used to connect to the server
password = "password"
# How many items to retrieve at most from the server in a single request. Lower values will make synchronization slower, higher values might make the server complain.
# Most servers are supposedly okay with up to 1000, but tt-rss complained so I dropped it to 500 here.
items_per_request = 500
[directories]
# Directory path where the internal feather data will be stored.
data = "data"
# Directory path where the user-facing files will be stored.
reader = "reader"
[time]
# Which timezone to use when writing date and time.
timezone = "Etc/UTC"
# How date and time are formatted. See https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior for the supported codes.
# This will be used in filenames so it's a good idea to use something sortable...
format = "%Y-%m-%d %H:%M"

666
google_reader.py Normal file
View file

@ -0,0 +1,666 @@
from dataclasses import dataclass
from typing import Literal
import requests
# Streams can be feeds, tags (folders) or system types.
STREAM_FEED = "feed/{feed_id}"
STREAM_TAG = "user/-/label/{label_title}"
STREAM_READ = "user/-/state/com.google/read"
STREAM_STARRED = "user/-/state/com.google/starred"
STREAM_KEPT_UNREAD = "user/-/state/com.google/kept-unread"
STREAM_BROADCAST = "user/-/state/com.google/broadcast"
STREAM_READING_LIST = "user/-/state/com.google/reading-list"
class ClientError(Exception):
"""Base class for Google Reader API errors."""
pass
class AuthenticationError(ClientError):
"""Raised when authentication fails."""
def __init__(self, message: str):
super().__init__(message)
class ResourceNotFoundError(ClientError):
"""Raised when a resource is not found."""
def __init__(self, message: str):
super().__init__(message)
@dataclass(frozen=True)
class AuthToken:
TokenType: str
AccessToken: str
@dataclass(frozen=True)
class UserInfo:
user_id: str
user_name: str
user_email: str
user_profile_id: str
@dataclass(frozen=True)
class Tag:
id: str
label: str | None = None
type: str | None = None
@dataclass(frozen=True)
class Subscription:
id: str
title: str
url: str
html_url: str
icon_url: str
categories: list[Tag]
@dataclass(frozen=True)
class ItemRef:
id: str
@dataclass(frozen=True)
class StreamIDs:
item_refs: list[ItemRef]
continuation: str | None
@dataclass(frozen=True)
class ContentHREF:
href: str
@dataclass(frozen=True)
class ContentHREFType:
href: str
type: str
@dataclass(frozen=True)
class ContentItemEnclosure:
url: str
type: str
@dataclass(frozen=True)
class ContentItemContent:
direction: str
content: str
@dataclass(frozen=True)
class ContentItemOrigin:
stream_id: str
title: str
html_url: str
@dataclass(frozen=True)
class ContentItem:
id: str
categories: list[str]
title: str
crawl_time_msec: str
timestamp_usec: str
published: int
updated: int
author: str
alternate: list[ContentHREFType]
summary: ContentItemContent
content: ContentItemContent
origin: ContentItemOrigin
enclosure: list[ContentItemEnclosure]
canonical: list[ContentHREF]
@dataclass(frozen=True)
class StreamContentItems:
direction: str
id: str
title: str
self: list[ContentHREF]
updated: int
items: list[ContentItem]
author: str
@dataclass(frozen=True)
class QuickAddSubscription:
query: str
num_results: int
stream_id: str
stream_name: str
class Client:
"""
Client for interacting with the Google Reader API.
"""
def __init__(
self, base_url: str, session: requests.Session | None = None, user_agent: str = "Google Reader Python Client"
):
"""
Initialize a new Google Reader API Client.
Args:
base_url: Base URL of the Miniflux instance (e.g., "https://reader.miniflux.app")
session: Optional requests.Session object for making HTTP requests.
user_agent: User agent string for the HTTP requests.
"""
self._base_url = base_url.rstrip("/")
self._session = session or requests.Session()
self._session.headers.update({"User-Agent": user_agent})
def login(self, username: str, password: str) -> AuthToken:
"""
Log in to the Google Reader API.
Args:
username: Username for the Google Reader account.
password: Password for the Google Reader account.
"""
response = self._session.post(
f"{self._base_url}/accounts/ClientLogin", data={"Email": username, "Passwd": password}
)
if response.status_code != 200:
raise AuthenticationError("Authentication failed")
auth_data = {}
for line in response.text.strip().split("\n"):
key, value = line.split("=", 1)
auth_data[key] = value
auth_token = auth_data.get("Auth")
if not auth_token:
raise AuthenticationError("No Auth token found in response")
return AuthToken(TokenType="GoogleLogin", AccessToken=auth_token)
def get_token(self, auth: AuthToken) -> str:
"""
Get the authentication token.
Args:
auth(AuthToken): Authentication token obtained from the login process.
Returns:
str: Authentication token.
Raises:
ClientError: If the request fails or the response is not valid.
AuthenticationError: If the authentication token is invalid.
"""
response = self._session.get(
f"{self._base_url}/reader/api/0/token",
headers={"Authorization": f"{auth.TokenType} auth={auth.AccessToken}"},
)
if response.status_code == 401:
raise AuthenticationError("Authentication failed")
elif response.status_code != 200:
raise ClientError("Failed to get token")
return response.text.strip()
def get_user_info(self, auth: AuthToken) -> UserInfo:
"""
Get user information from the Google Reader API.
Args:
auth(AuthToken): Authentication token obtained from the login process.
Returns:
UserInfo: User information object containing user ID, name, email, and profile ID.
Raises:
ClientError: If the request fails or the response is not valid.
AuthenticationError: If the authentication token is invalid.
"""
response = self._session.get(
f"{self._base_url}/reader/api/0/user-info",
headers={"Authorization": f"{auth.TokenType} auth={auth.AccessToken}"},
)
if response.status_code == 401:
raise AuthenticationError("Authentication failed")
elif response.status_code != 200:
raise ClientError("Failed to get user info")
user_info = response.json()
return UserInfo(
user_id=user_info.get("userId", ""),
user_name=user_info.get("userName", ""),
user_email=user_info.get("userEmail", ""),
user_profile_id=user_info.get("userProfileId", ""),
)
def list_subscriptions(self, auth: AuthToken) -> list[Subscription]:
"""
Get the list of subscriptions from the Google Reader API.
Args:
auth(AuthToken): Authentication token obtained from the login process.
Returns:
List of Subscription objects.
Raises:
ClientError: If the request fails or the response is not valid.
AuthenticationError: If the authentication token is invalid.
"""
response = self._session.get(
f"{self._base_url}/reader/api/0/subscription/list",
headers={"Authorization": f"{auth.TokenType} auth={auth.AccessToken}"},
params={"output": "json"},
)
if response.status_code == 401:
raise AuthenticationError("Authentication failed")
elif response.status_code != 200:
raise ClientError("Failed to get subscriptions")
return [
Subscription(
id=sub.get("id", ""),
title=sub.get("title", ""),
url=sub.get("url", ""),
html_url=sub.get("htmlUrl", ""),
icon_url=sub.get("iconUrl", ""),
categories=[Tag(**cat) for cat in sub.get("categories", [])],
)
for sub in response.json().get("subscriptions", [])
]
def edit_subscription(
self,
auth: AuthToken,
csrf_token: str,
subscription_id: str,
action: Literal["edit", "subscribe", "unsubscribe"],
remove_label_id: str | None = None,
add_label_id: str | None = None,
title: str | None = None,
) -> bool:
"""
Edit a subscription.
Args:
auth(AuthToken): Authentication token obtained from the login process.
csrf_token(str): CSRF token for the request.
subscription_id(str): ID of the subscription to edit.
action(str): Action to perform on the subscription (edit, subscribe, unsubscribe).
remove_label_id(str): Label to remove from the subscription.
add_label_id(str): Label to add to the subscription.
title(str): New title for the subscription.
Returns:
bool: True if the operation was successful, False otherwise.
Raises:
ClientError: If the request fails or the response is not valid.
AuthenticationError: If the authentication token is invalid.
"""
data = {"s": subscription_id, "ac": action, "T": csrf_token}
if remove_label_id:
data["r"] = remove_label_id
if add_label_id:
data["a"] = add_label_id
if title:
data["t"] = title
response = self._session.post(
f"{self._base_url}/reader/api/0/subscription/edit",
headers={"Authorization": f"{auth.TokenType} auth={auth.AccessToken}"},
data=data,
)
if response.status_code == 401:
raise AuthenticationError("Authentication failed")
elif response.status_code != 200:
raise ClientError("Failed to edit subscription")
return True
def quick_add_subscription(self, auth: AuthToken, csrf_token: str, url: str) -> QuickAddSubscription:
"""
Quick add a subscription.
Args:
auth(AuthToken): Authentication token obtained from the login process.
csrf_token(str): CSRF token for the request.
url(str): URL of the subscription to add.
Returns:
QuickAddSubscription: Object containing the result of the quick add operation.
Raises:
ClientError: If the request fails or the response is not valid.
AuthenticationError: If the authentication token is invalid.
"""
response = self._session.post(
f"{self._base_url}/reader/api/0/subscription/quickadd",
headers={"Authorization": f"{auth.TokenType} auth={auth.AccessToken}"},
params={"output": "json"},
data={"quickadd": url, "T": csrf_token},
)
if response.status_code == 401:
raise AuthenticationError("Authentication failed")
elif response.status_code != 200:
raise ClientError("Failed to quick add subscription")
response = response.json()
return QuickAddSubscription(
query=response.get("query", ""),
num_results=response.get("numResults", 0),
stream_id=response.get("streamId", ""),
stream_name=response.get("streamName", ""),
)
def get_stream_items_ids(
self,
auth: AuthToken,
stream_id: str,
limit: int = 1000,
direction: Literal["asc", "desc"] = "desc",
start_time: int | None = None,
continuation: str | None = None,
exclude_target: Literal["user/-/state/com.google/read"] | None = None,
include_target: Literal[
"user/-/state/com.google/read", "user/-/state/com.google/starred", "user/-/state/com.google/like"
]
| None = None,
) -> StreamIDs:
"""
Get item IDs for a given stream.
Args:
stream_id(str): ID of the stream to retrieve item IDs from.
limit(int): Maximum number of items to retrieve.
direction(Literal["asc", "desc"]): Direction to retrieve items (ascending or descending).
start_time(int | None): Optional start time for retrieving items.
continuation(str | None): Optional continuation token for pagination.
exclude_target(str | None): Optional target to exclude from results.
include_target(str | None): Optional target to include in results.
Returns:
List of item IDs.
"""
params = {"output": "json", "s": stream_id, "n": limit}
if direction == "asc":
params["r"] = "o"
if start_time:
params["ot"] = start_time
if exclude_target:
params["xt"] = exclude_target
if include_target:
params["it"] = include_target
if continuation:
params["c"] = continuation
response = self._session.get(
f"{self._base_url}/reader/api/0/stream/items/ids",
headers={"Authorization": f"{auth.TokenType} auth={auth.AccessToken}"},
params=params,
)
if response.status_code == 401:
raise AuthenticationError("Authentication failed")
elif response.status_code != 200:
raise ClientError("Failed to get item IDs")
data = response.json()
return StreamIDs(
item_refs=[ItemRef(id=item["id"]) for item in data.get("itemRefs", [])],
continuation=data.get("continuation", ""),
)
def get_stream_items_contents(self, auth: AuthToken, csrf_token: str, item_ids: list[str]) -> StreamContentItems:
"""
Get the contents of items
Args:
auth(AuthToken): Authentication token obtained from the login process.
csrf_token(str): CSRF token for the request.
item_ids(list[str]): List of item IDs to retrieve.
Returns:
StreamContentItems: List of item contents.
Raises:
ClientError: If the request fails or the response is not valid.
AuthenticationError: If the authentication token is invalid.
"""
response = self._session.post(
f"{self._base_url}/reader/api/0/stream/items/contents",
headers={"Authorization": f"{auth.TokenType} auth={auth.AccessToken}"},
params={"output": "json"},
data={"i": item_ids, "T": csrf_token},
)
if response.status_code == 401:
raise AuthenticationError("Authentication failed")
elif response.status_code != 200:
raise ClientError("Failed to get item contents")
data = response.json()
return StreamContentItems(
direction=data.get("direction", ""),
id=data.get("id", ""),
title=data.get("title", ""),
self=[ContentHREF(**item) for item in data.get("self", [])],
updated=data.get("updated", 0),
items=[
ContentItem(
id=item.get("id", ""),
categories=item.get("categories", []),
title=item.get("title", ""),
crawl_time_msec=item.get("crawlTimeMsec", ""),
timestamp_usec=item.get("timestampUsec", ""),
published=item.get("published", 0),
updated=item.get("updated", 0),
author=item.get("author", ""),
alternate=[
ContentHREFType(href=alt.get("href", ""), type=alt.get("type", ""))
for alt in item.get("alternate", [])
],
summary=ContentItemContent(
direction=item.get("summary", {}).get("direction", ""),
content=item.get("summary", {}).get("content", ""),
),
content=ContentItemContent(
direction=item.get("content", {}).get("direction", ""),
content=item.get("content", {}).get("content", ""),
),
origin=ContentItemOrigin(
stream_id=item.get("origin", {}).get("streamId", ""),
title=item.get("origin", {}).get("title", ""),
html_url=item.get("origin", {}).get("htmlUrl", ""),
),
enclosure=[],#ContentItemEnclosure(**enc) for enc in item.get("enclosure", [])],
canonical=[ContentHREF(**can) for can in item.get("canonical", [])],
)
for item in data.get("items", [])
],
author=data.get("author", ""),
)
def edit_tags(
self,
auth: AuthToken,
csrf_token: str,
item_ids: list[str],
add_tags: list[str] | None = None,
remove_tags: list[str] | None = None,
) -> bool:
"""
Edit tags for a list of items.
Args:
auth(AuthToken): Authentication token obtained from the login process.
csrf_token(str): CSRF token for the request.
item_ids(list[str]): List of item IDs to edit tags for.
add_tags(list[str]): List of tags to add.
remove_tags(list[str]): List of tags to remove.
Returns:
bool: True if the operation was successful, False otherwise.
Raises:
ClientError: If the request fails or the response is not valid.
AuthenticationError: If the authentication token is invalid.
"""
data = {"i": item_ids, "T": csrf_token}
if add_tags:
data["a"] = add_tags
if remove_tags:
data["r"] = remove_tags
if not add_tags and not remove_tags:
raise ClientError("No tags to add or remove")
response = self._session.post(
f"{self._base_url}/reader/api/0/edit-tag",
headers={"Authorization": f"{auth.TokenType} auth={auth.AccessToken}"},
params={"output": "json"},
data=data,
)
if response.status_code == 401:
raise AuthenticationError("Authentication failed")
elif response.status_code != 200:
raise ClientError("Failed to edit tags")
return True
def disable_tag(self, auth: AuthToken, csrf_token: str, tag_id: str) -> bool:
"""
Deletes a category or a tag.
Args:
auth(AuthToken): Authentication token obtained from the login process.
csrf_token(str): CSRF token for the request.
tag_id(str): ID of the tag to delete.
Returns:
bool: True if the operation was successful, False otherwise.
Raises:
ClientError: If the request fails or the response is not valid.
AuthenticationError: If the authentication token is invalid.
"""
response = self._session.post(
f"{self._base_url}/reader/api/0/disable-tag",
headers={"Authorization": f"{auth.TokenType} auth={auth.AccessToken}"},
params={"output": "json"},
data={"s": tag_id, "T": csrf_token},
)
if response.status_code == 401:
raise AuthenticationError("Authentication failed")
elif response.status_code != 200:
raise ClientError("Failed to disable tags")
return True
def delete_tag(self, auth: AuthToken, csrf_token: str, tag_id: str) -> bool:
"""
Deletes a category or a tag.
Args:
auth(AuthToken): Authentication token obtained from the login process.
csrf_token(str): CSRF token for the request.
tag_id(str): ID of the tag to delete.
Returns:
bool: True if the operation was successful, False otherwise.
Raises:
ClientError: If the request fails or the response is not valid.
AuthenticationError: If the authentication token is invalid.
"""
return self.disable_tag(auth, csrf_token, tag_id)
def rename_tag(self, auth: AuthToken, csrf_token: str, tag_id: str, new_label_name: str) -> bool:
"""
Rename a category or a tag.
Args:
auth(AuthToken): Authentication token obtained from the login process.
csrf_token(str): CSRF token for the request.
tag_id(str): ID of the tag to rename.
new_label_name(str): New name for the category or tag.
Returns:
bool: True if the operation was successful, False otherwise.
Raises:
ClientError: If the request fails or the response is not valid.
AuthenticationError: If the authentication token is invalid.
"""
response = self._session.post(
f"{self._base_url}/reader/api/0/rename-tag",
headers={"Authorization": f"{auth.TokenType} auth={auth.AccessToken}"},
params={"output": "json"},
data={"s": tag_id, "dest": get_label_id(new_label_name), "T": csrf_token},
)
if response.status_code == 401:
raise AuthenticationError("Authentication failed")
elif response.status_code != 200:
raise ClientError("Failed to rename tags")
return True
def list_tags(self, auth: AuthToken) -> list[Tag]:
"""
Get the list of tags from the Google Reader API.
Args:
auth(AuthToken): Authentication token obtained from the login process.
Returns:
List of Tag objects.
Raises:
ClientError: If the request fails or the response is not valid.
AuthenticationError: If the authentication token is invalid.
"""
response = self._session.get(
f"{self._base_url}/reader/api/0/tag/list",
headers={"Authorization": f"{auth.TokenType} auth={auth.AccessToken}"},
params={"output": "json"},
)
if response.status_code == 401:
raise AuthenticationError("Authentication failed")
elif response.status_code != 200:
raise ClientError("Failed to get tags")
return [Tag(**tag) for tag in response.json().get("tags", [])]
def mark_all_as_read(
self, auth: AuthToken, csrf_token: str, stream_id: str, before_timestamp: int | None = None
) -> bool:
"""
Mark all items in a stream as read.
Args:
auth(AuthToken): Authentication token obtained from the login process.
csrf_token(str): CSRF token for the request.
stream_id(str): ID of the stream to mark as read.
before_timestamp(int | None): Optional timestamp to mark items as read before this time.
Returns:
bool: True if the operation was successful, False otherwise.
Raises:
ClientError: If the request fails or the response is not valid.
AuthenticationError: If the authentication token is invalid.
"""
data = {"s": stream_id, "T": csrf_token}
if before_timestamp:
data["ts"] = str(before_timestamp)
response = self._session.post(
f"{self._base_url}/reader/api/0/mark-all-as-read",
headers={"Authorization": f"{auth.TokenType} auth={auth.AccessToken}"},
data=data,
)
match response.status_code:
case 401:
raise AuthenticationError("Authentication failed")
case 404:
raise ResourceNotFoundError("Stream not found")
case _ if response.status_code != 200:
raise ClientError("Failed to mark all as read")
return True
def get_long_item_id(item_id: int) -> str:
"""
Convert a short item ID to a long item ID.
Args:
item_id(int): Short item ID.
Returns:
Long item ID.
"""
return f"tag:google.com,2005:reader/item/{item_id:016x}"
def get_label_id(label_title: str) -> str:
"""
Convert a label to a label ID.
Args:
label_title(str): Label name.
Returns:
Label stream ID.
"""
return STREAM_TAG.format(label_title=label_title)

240
main.py Normal file
View file

@ -0,0 +1,240 @@
import os
import re
import json
import google_reader
import tomllib
import sys
from datetime import datetime
from zoneinfo import ZoneInfo
from pathlib import Path
from hashlib import sha256
#%% Configuration
class Config:
def __init__(self):
with open("config.default.toml", "rb") as f:
default_config = tomllib.load(f)
config_path = os.environ.get("CONFIG_PATH") or "config.toml"
with open(config_path, "rb") as f:
config = tomllib.load(f)
def get_config(category, field, can_default=True):
env_name = f"{category.upper()}_{field.upper()}"
c = config.get(category, {})
if env_name in os.environ:
return os.environ[env_name]
elif field in c:
return c[field]
elif can_default:
return default_config[category][field]
else:
print(f"Error while loading configuration: {category}.{field} not found in {config_path} nor in environment variable {env_name}", file=sys.stderr)
exit(1)
# Get config fields
self.html_root: Path = Path(get_config("directories", "reader"))
self.json_root: Path = Path(get_config("directories", "data"))
self.server_url: str = get_config("server", "url", False)
self.server_user: str = get_config("server", "user", False)
self.server_password: str = get_config("server", "password", False)
self.items_per_query: int = int(get_config("server", "items_per_request"))
self.timezone: ZoneInfo = ZoneInfo(get_config("time", "timezone"))
self.time_format: str = get_config("time", "format")
# Computed config fields
self.update_lock = self.json_root / "update.lock"
# Create missing directories
self.html_root.mkdir(exist_ok=True)
self.json_root.mkdir(exist_ok=True)
#%% Interaction with server
label_name = re.compile("user/.*/label/(.*)")
class ClientSession:
client: google_reader.Client
auth_token: str
csrf_token: str
def __init__(self, config: Config):
self.client = google_reader.Client(config.server_url)
self.auth_token = self.client.login(config.server_user, config.server_password)
self.csrf_token = self.client.get_token(self.auth_token)
def mark_as_read(self, item_ids):
self.client.edit_tags(self.auth_token, self.csrf_token, item_ids=item_ids, add_tags=[google_reader.STREAM_READ])
def list_folders(self):
folders = [tag for tag in self.client.list_tags(self.auth_token) if tag.type == "folder"]
l = []
for folder in folders:
folder_name = folder.label or label_name.search(folder.id).group(1)
folder_id = folder.id
l.append((folder_name, folder_id))
return l
def get_stream_items_ids(self, *args, **kwargs):
return self.client.get_stream_items_ids(self.auth_token, *args, **kwargs)
def get_stream_items_contents(self, *args, **kwargs):
return self.client.get_stream_items_contents(self.auth_token, self.csrf_token, *args, **kwargs)
#%% Regular feather operations
def mark_deleted_as_read(config, client_session):
# Mark items that are in the JSON directory but with missing HTML file as read on the server
if config.update_lock.exists():
print("The previous synchronization was aborted, not marking any item as read in order to avoid collateral damage")
return
marked_as_read = 0
to_mark_as_read = []
for stored_item in config.json_root.glob("*.json"):
item_json = json.load(stored_item.open("r"))
html_path = config.html_root / item_json["html_path"]
if not html_path.exists():
to_mark_as_read.append(item_json["id"])
# delete JSON file
stored_item.unlink()
marked_as_read += 1
for i in range(0, len(to_mark_as_read), config.items_per_query):
client_session.mark_as_read(to_mark_as_read[i:i+500])
print(f"Marked {marked_as_read} items as read")
def get_html_path(config, item_json):
html_directory = config.html_root / item_json["folder"].replace("/", "-")
html_directory.mkdir(exist_ok=True)
datetime_published = datetime.fromtimestamp(item_json["published"], config.timezone).strftime(config.time_format)
html_name = f"{datetime_published}\t[{item_json["origin_title"]}]\t{item_json["title"]}.html".replace("/", "-")
html_name = html_name[:200] + '...html' if len(html_name) > 200 else html_name
html_path = html_directory / html_name
return html_path
def synchronize_with_server(config, client_session):
# Synchronize items from the server, generating and deleting JSON and HTML files accordingly
config.update_lock.touch()
print("Synchronizing with server...")
new_items = 0
grabbed_item_paths = []
folders = client_session.list_folders()
for (folder_name, folder_id) in folders:
print(f" Updating folder {folder_name}")
def process(item_ids):
nonlocal new_items, grabbed_item_paths
if len(item_ids) > 0:
item_contents = client_session.get_stream_items_contents(item_ids=item_ids)
for item_content in item_contents.items:
item_json = {
"id": item_content.id,
"folder": folder_name,
"title": item_content.title,
"published": item_content.published,
"updated": item_content.updated,
"author": item_content.author,
"summary": item_content.summary.content,
"content": item_content.content.content,
"origin_title": item_content.origin.title,
"origin_url": item_content.origin.html_url,
"canonical_url": item_content.canonical[0].href,
}
item_json["html_path"] = str(get_html_path(config, item_json).relative_to(config.html_root))
p = config.json_root / f"{ sha256(item_json["id"].encode("utf-8")).hexdigest() }.json"
grabbed_item_paths.append(p)
if not p.exists():
# write JSON
with p.open("w") as f:
json.dump(item_json, f)
# write HTML
generate_html_for_item(config, item_json)
new_items += 1
continuation = None
while continuation != '':
items = client_session.get_stream_items_ids(stream_id=folder_id, exclude_target="user/-/state/com.google/read", limit=config.items_per_query, continuation=continuation)
item_ids = [item.id for item in items.item_refs]
process(item_ids)
continuation = items.continuation
# Remove items that we didn't get from the server but are in the JSON directory
removed_items = 0
for item_path in config.json_root.glob("*.json"):
if not item_path in grabbed_item_paths:
# remove HTML
item_json = json.load(item_path.open("r"))
remove_html_for_item(config, item_json)
# remove JSON
item_path.unlink()
removed_items += 1
print(f"Synchronization successful ({new_items} new items, {removed_items} removed)")
config.update_lock.unlink()
def generate_html_for_item(config, item_json):
# Write HTML file for a JSON object
datetime_published = datetime.fromtimestamp(item_json["published"], config.timezone).strftime(config.time_format)
html_path = config.html_root / item_json["html_path"]
if html_path.exists():
print(f"WARNING: a file already exist for {html_path}. Either the feed has duplicate entries, or something has gone terribly wrong.")
else:
with html_path.open("w") as f:
f.write(f"""
<!doctype html>
<html lang="en-US">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width" />
<title>{item_json["title"]}</title>
</head>
<body style="background-color:black; color:white;">
<style>a{{color:palevioletred; text-decoration:none;}}</style>
<article style="max-width:60rem; margin:auto;">
<p style="display:flex; flex-direction:row; justify-content:space-between;">
<span>{datetime_published}</span>
<span><a href="{item_json["origin_url"]}">{item_json["origin_title"]}</a></span>
</p>
<h1><a href="{item_json["canonical_url"]}">{item_json["title"]}</a></h1>
<h3>{item_json["author"]}</h3>
<div>{item_json["summary"]}</div>
<div>{item_json["content"]}</div>
</article>
</body>
</html>
""")
def remove_html_for_item(config, item_json):
# Delete a HTML file for a JSON object
html_path = config.html_root / item_json["html_path"]
html_path.unlink()
def remove_empty_html_directories(config):
# Remove empty directories in the HTML directory
html_root = config.html_root
for (dirpath, dirnames, filenames) in html_root.walk(top_down=False):
if dirpath != html_root:
if len(dirnames) == 0 and len(filenames) == 0:
dirpath.rmdir()
def process(config, client_session):
# Do a full feather update
mark_deleted_as_read(config, client_session)
synchronize_with_server(config, client_session)
remove_empty_html_directories(config)
#%% Run feather
def main():
config = Config()
client_session = ClientSession(config)
process(config, client_session)
if __name__ == "__main__":
main()

9
pyproject.toml Normal file
View file

@ -0,0 +1,9 @@
[project]
name = "feather"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"requests>=2.32.5",
]

98
uv.lock generated Normal file
View file

@ -0,0 +1,98 @@
version = 1
revision = 3
requires-python = ">=3.12"
[[package]]
name = "certifi"
version = "2025.10.5"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519, upload-time = "2025-10-05T04:12:15.808Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" },
]
[[package]]
name = "charset-normalizer"
version = "3.4.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/83/2d/5fd176ceb9b2fc619e63405525573493ca23441330fcdaee6bef9460e924/charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14", size = 122371, upload-time = "2025-08-09T07:57:28.46Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e9/5e/14c94999e418d9b87682734589404a25854d5f5d0408df68bc15b6ff54bb/charset_normalizer-3.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28e334d3ff134e88989d90ba04b47d84382a828c061d0d1027b1b12a62b39b1", size = 205655, upload-time = "2025-08-09T07:56:08.475Z" },
{ url = "https://files.pythonhosted.org/packages/7d/a8/c6ec5d389672521f644505a257f50544c074cf5fc292d5390331cd6fc9c3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cacf8f7297b0c4fcb74227692ca46b4a5852f8f4f24b3c766dd94a1075c4884", size = 146223, upload-time = "2025-08-09T07:56:09.708Z" },
{ url = "https://files.pythonhosted.org/packages/fc/eb/a2ffb08547f4e1e5415fb69eb7db25932c52a52bed371429648db4d84fb1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c6fd51128a41297f5409deab284fecbe5305ebd7e5a1f959bee1c054622b7018", size = 159366, upload-time = "2025-08-09T07:56:11.326Z" },
{ url = "https://files.pythonhosted.org/packages/82/10/0fd19f20c624b278dddaf83b8464dcddc2456cb4b02bb902a6da126b87a1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cfb2aad70f2c6debfbcb717f23b7eb55febc0bb23dcffc0f076009da10c6392", size = 157104, upload-time = "2025-08-09T07:56:13.014Z" },
{ url = "https://files.pythonhosted.org/packages/16/ab/0233c3231af734f5dfcf0844aa9582d5a1466c985bbed6cedab85af9bfe3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1606f4a55c0fd363d754049cdf400175ee96c992b1f8018b993941f221221c5f", size = 151830, upload-time = "2025-08-09T07:56:14.428Z" },
{ url = "https://files.pythonhosted.org/packages/ae/02/e29e22b4e02839a0e4a06557b1999d0a47db3567e82989b5bb21f3fbbd9f/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:027b776c26d38b7f15b26a5da1044f376455fb3766df8fc38563b4efbc515154", size = 148854, upload-time = "2025-08-09T07:56:16.051Z" },
{ url = "https://files.pythonhosted.org/packages/05/6b/e2539a0a4be302b481e8cafb5af8792da8093b486885a1ae4d15d452bcec/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:42e5088973e56e31e4fa58eb6bd709e42fc03799c11c42929592889a2e54c491", size = 160670, upload-time = "2025-08-09T07:56:17.314Z" },
{ url = "https://files.pythonhosted.org/packages/31/e7/883ee5676a2ef217a40ce0bffcc3d0dfbf9e64cbcfbdf822c52981c3304b/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cc34f233c9e71701040d772aa7490318673aa7164a0efe3172b2981218c26d93", size = 158501, upload-time = "2025-08-09T07:56:18.641Z" },
{ url = "https://files.pythonhosted.org/packages/c1/35/6525b21aa0db614cf8b5792d232021dca3df7f90a1944db934efa5d20bb1/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:320e8e66157cc4e247d9ddca8e21f427efc7a04bbd0ac8a9faf56583fa543f9f", size = 153173, upload-time = "2025-08-09T07:56:20.289Z" },
{ url = "https://files.pythonhosted.org/packages/50/ee/f4704bad8201de513fdc8aac1cabc87e38c5818c93857140e06e772b5892/charset_normalizer-3.4.3-cp312-cp312-win32.whl", hash = "sha256:fb6fecfd65564f208cbf0fba07f107fb661bcd1a7c389edbced3f7a493f70e37", size = 99822, upload-time = "2025-08-09T07:56:21.551Z" },
{ url = "https://files.pythonhosted.org/packages/39/f5/3b3836ca6064d0992c58c7561c6b6eee1b3892e9665d650c803bd5614522/charset_normalizer-3.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:86df271bf921c2ee3818f0522e9a5b8092ca2ad8b065ece5d7d9d0e9f4849bcc", size = 107543, upload-time = "2025-08-09T07:56:23.115Z" },
{ url = "https://files.pythonhosted.org/packages/65/ca/2135ac97709b400c7654b4b764daf5c5567c2da45a30cdd20f9eefe2d658/charset_normalizer-3.4.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:14c2a87c65b351109f6abfc424cab3927b3bdece6f706e4d12faaf3d52ee5efe", size = 205326, upload-time = "2025-08-09T07:56:24.721Z" },
{ url = "https://files.pythonhosted.org/packages/71/11/98a04c3c97dd34e49c7d247083af03645ca3730809a5509443f3c37f7c99/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41d1fc408ff5fdfb910200ec0e74abc40387bccb3252f3f27c0676731df2b2c8", size = 146008, upload-time = "2025-08-09T07:56:26.004Z" },
{ url = "https://files.pythonhosted.org/packages/60/f5/4659a4cb3c4ec146bec80c32d8bb16033752574c20b1252ee842a95d1a1e/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1bb60174149316da1c35fa5233681f7c0f9f514509b8e399ab70fea5f17e45c9", size = 159196, upload-time = "2025-08-09T07:56:27.25Z" },
{ url = "https://files.pythonhosted.org/packages/86/9e/f552f7a00611f168b9a5865a1414179b2c6de8235a4fa40189f6f79a1753/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30d006f98569de3459c2fc1f2acde170b7b2bd265dc1943e87e1a4efe1b67c31", size = 156819, upload-time = "2025-08-09T07:56:28.515Z" },
{ url = "https://files.pythonhosted.org/packages/7e/95/42aa2156235cbc8fa61208aded06ef46111c4d3f0de233107b3f38631803/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:416175faf02e4b0810f1f38bcb54682878a4af94059a1cd63b8747244420801f", size = 151350, upload-time = "2025-08-09T07:56:29.716Z" },
{ url = "https://files.pythonhosted.org/packages/c2/a9/3865b02c56f300a6f94fc631ef54f0a8a29da74fb45a773dfd3dcd380af7/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aab0f181c486f973bc7262a97f5aca3ee7e1437011ef0c2ec04b5a11d16c927", size = 148644, upload-time = "2025-08-09T07:56:30.984Z" },
{ url = "https://files.pythonhosted.org/packages/77/d9/cbcf1a2a5c7d7856f11e7ac2d782aec12bdfea60d104e60e0aa1c97849dc/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabf8315679312cfa71302f9bd509ded4f2f263fb5b765cf1433b39106c3cc9", size = 160468, upload-time = "2025-08-09T07:56:32.252Z" },
{ url = "https://files.pythonhosted.org/packages/f6/42/6f45efee8697b89fda4d50580f292b8f7f9306cb2971d4b53f8914e4d890/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:bd28b817ea8c70215401f657edef3a8aa83c29d447fb0b622c35403780ba11d5", size = 158187, upload-time = "2025-08-09T07:56:33.481Z" },
{ url = "https://files.pythonhosted.org/packages/70/99/f1c3bdcfaa9c45b3ce96f70b14f070411366fa19549c1d4832c935d8e2c3/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18343b2d246dc6761a249ba1fb13f9ee9a2bcd95decc767319506056ea4ad4dc", size = 152699, upload-time = "2025-08-09T07:56:34.739Z" },
{ url = "https://files.pythonhosted.org/packages/a3/ad/b0081f2f99a4b194bcbb1934ef3b12aa4d9702ced80a37026b7607c72e58/charset_normalizer-3.4.3-cp313-cp313-win32.whl", hash = "sha256:6fb70de56f1859a3f71261cbe41005f56a7842cc348d3aeb26237560bfa5e0ce", size = 99580, upload-time = "2025-08-09T07:56:35.981Z" },
{ url = "https://files.pythonhosted.org/packages/9a/8f/ae790790c7b64f925e5c953b924aaa42a243fb778fed9e41f147b2a5715a/charset_normalizer-3.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:cf1ebb7d78e1ad8ec2a8c4732c7be2e736f6e5123a4146c5b89c9d1f585f8cef", size = 107366, upload-time = "2025-08-09T07:56:37.339Z" },
{ url = "https://files.pythonhosted.org/packages/8e/91/b5a06ad970ddc7a0e513112d40113e834638f4ca1120eb727a249fb2715e/charset_normalizer-3.4.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3cd35b7e8aedeb9e34c41385fda4f73ba609e561faedfae0a9e75e44ac558a15", size = 204342, upload-time = "2025-08-09T07:56:38.687Z" },
{ url = "https://files.pythonhosted.org/packages/ce/ec/1edc30a377f0a02689342f214455c3f6c2fbedd896a1d2f856c002fc3062/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b89bc04de1d83006373429975f8ef9e7932534b8cc9ca582e4db7d20d91816db", size = 145995, upload-time = "2025-08-09T07:56:40.048Z" },
{ url = "https://files.pythonhosted.org/packages/17/e5/5e67ab85e6d22b04641acb5399c8684f4d37caf7558a53859f0283a650e9/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2001a39612b241dae17b4687898843f254f8748b796a2e16f1051a17078d991d", size = 158640, upload-time = "2025-08-09T07:56:41.311Z" },
{ url = "https://files.pythonhosted.org/packages/f1/e5/38421987f6c697ee3722981289d554957c4be652f963d71c5e46a262e135/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8dcfc373f888e4fb39a7bc57e93e3b845e7f462dacc008d9749568b1c4ece096", size = 156636, upload-time = "2025-08-09T07:56:43.195Z" },
{ url = "https://files.pythonhosted.org/packages/a0/e4/5a075de8daa3ec0745a9a3b54467e0c2967daaaf2cec04c845f73493e9a1/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b97b8404387b96cdbd30ad660f6407799126d26a39ca65729162fd810a99aa", size = 150939, upload-time = "2025-08-09T07:56:44.819Z" },
{ url = "https://files.pythonhosted.org/packages/02/f7/3611b32318b30974131db62b4043f335861d4d9b49adc6d57c1149cc49d4/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ccf600859c183d70eb47e05a44cd80a4ce77394d1ac0f79dbd2dd90a69a3a049", size = 148580, upload-time = "2025-08-09T07:56:46.684Z" },
{ url = "https://files.pythonhosted.org/packages/7e/61/19b36f4bd67f2793ab6a99b979b4e4f3d8fc754cbdffb805335df4337126/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:53cd68b185d98dde4ad8990e56a58dea83a4162161b1ea9272e5c9182ce415e0", size = 159870, upload-time = "2025-08-09T07:56:47.941Z" },
{ url = "https://files.pythonhosted.org/packages/06/57/84722eefdd338c04cf3030ada66889298eaedf3e7a30a624201e0cbe424a/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:30a96e1e1f865f78b030d65241c1ee850cdf422d869e9028e2fc1d5e4db73b92", size = 157797, upload-time = "2025-08-09T07:56:49.756Z" },
{ url = "https://files.pythonhosted.org/packages/72/2a/aff5dd112b2f14bcc3462c312dce5445806bfc8ab3a7328555da95330e4b/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d716a916938e03231e86e43782ca7878fb602a125a91e7acb8b5112e2e96ac16", size = 152224, upload-time = "2025-08-09T07:56:51.369Z" },
{ url = "https://files.pythonhosted.org/packages/b7/8c/9839225320046ed279c6e839d51f028342eb77c91c89b8ef2549f951f3ec/charset_normalizer-3.4.3-cp314-cp314-win32.whl", hash = "sha256:c6dbd0ccdda3a2ba7c2ecd9d77b37f3b5831687d8dc1b6ca5f56a4880cc7b7ce", size = 100086, upload-time = "2025-08-09T07:56:52.722Z" },
{ url = "https://files.pythonhosted.org/packages/ee/7a/36fbcf646e41f710ce0a563c1c9a343c6edf9be80786edeb15b6f62e17db/charset_normalizer-3.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:73dc19b562516fc9bcf6e5d6e596df0b4eb98d87e4f79f3ae71840e6ed21361c", size = 107400, upload-time = "2025-08-09T07:56:55.172Z" },
{ url = "https://files.pythonhosted.org/packages/8a/1f/f041989e93b001bc4e44bb1669ccdcf54d3f00e628229a85b08d330615c5/charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a", size = 53175, upload-time = "2025-08-09T07:57:26.864Z" },
]
[[package]]
name = "feather"
version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "requests" },
]
[package.metadata]
requires-dist = [{ name = "requests", specifier = ">=2.32.5" }]
[[package]]
name = "idna"
version = "3.10"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
]
[[package]]
name = "requests"
version = "2.32.5"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "charset-normalizer" },
{ name = "idna" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
]
[[package]]
name = "urllib3"
version = "2.5.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
]