import os
import re
import json
import google_reader
import tomllib
import sys
from datetime import datetime
from zoneinfo import ZoneInfo
from pathlib import Path
from hashlib import sha256

#%% Configuration

class Config:
    def __init__(self):
        with open("config.default.toml", "rb") as f:
            default_config = tomllib.load(f)

        config_path = os.environ.get("CONFIG_PATH") or "config.toml"
        with open(config_path, "rb") as f:
            config = tomllib.load(f)
        
        def get_config(category, field, can_default=True):
            env_name = f"{category.upper()}_{field.upper()}"
            c = config.get(category, {})
            if env_name in os.environ:
                return os.environ[env_name]
            elif field in c:
                return c[field]
            elif can_default:
                return default_config[category][field]
            else:
                print(f"Error while loading configuration: {category}.{field} not found in {config_path} nor in environment variable {env_name}", file=sys.stderr)
                exit(1)

        # Get config fields
        self.html_root: Path = Path(get_config("directories", "reader"))
        self.json_root: Path = Path(get_config("directories", "data"))
        self.server_url: str = get_config("server", "url", False)
        self.server_user: str = get_config("server", "user", False)
        self.server_password: str = get_config("server", "password", False)
        self.items_per_query: int = int(get_config("server", "items_per_request"))
        self.timezone: ZoneInfo = ZoneInfo(get_config("time", "timezone"))
        self.time_format: str = get_config("time", "format")

        # Computed config fields
        self.update_lock = self.json_root / "update.lock"

        # Create missing directories
        self.html_root.mkdir(exist_ok=True)
        self.json_root.mkdir(exist_ok=True)

#%% Interaction with server

label_name = re.compile("user/.*/label/(.*)")
class ClientSession:
    client: google_reader.Client
    auth_token: str
    csrf_token: str

    def __init__(self, config: Config):
        self.client = google_reader.Client(config.server_url)
        self.auth_token = self.client.login(config.server_user, config.server_password)
        self.csrf_token = self.client.get_token(self.auth_token)
    
    def mark_as_read(self, item_ids):
        self.client.edit_tags(self.auth_token, self.csrf_token, item_ids=item_ids, add_tags=[google_reader.STREAM_READ])
    
    def list_folders(self):
        folders = [tag for tag in self.client.list_tags(self.auth_token) if tag.type == "folder"]
        l = []
        for folder in folders:
            folder_name = folder.label or label_name.search(folder.id).group(1)
            folder_id = folder.id
            l.append((folder_name, folder_id))
        return l
    
    def get_stream_items_ids(self, *args, **kwargs):
        return self.client.get_stream_items_ids(self.auth_token, *args, **kwargs)
    
    def get_stream_items_contents(self, *args, **kwargs):
        return self.client.get_stream_items_contents(self.auth_token, self.csrf_token, *args, **kwargs)

#%% Regular feather operations

def mark_deleted_as_read(config, client_session):
    # Mark items that are in the JSON directory but with missing HTML file as read on the server
    if config.update_lock.exists():
        print("The previous synchronization was aborted, not marking any item as read in order to avoid collateral damage")
        return

    marked_as_read = 0
    to_mark_as_read = []
    for stored_item in config.json_root.glob("*.json"):
        item_json = json.load(stored_item.open("r"))
        html_path = config.html_root / item_json["html_path"]
        if not html_path.exists():
            to_mark_as_read.append(item_json["id"])
            # delete JSON file
            stored_item.unlink()
            marked_as_read += 1

    for i in range(0, len(to_mark_as_read), config.items_per_query):
        client_session.mark_as_read(to_mark_as_read[i:i+500])

    print(f"Marked {marked_as_read} items as read")

def get_html_path(config, item_json):
    html_directory = config.html_root / item_json["folder"].replace("/", "-")
    html_directory.mkdir(exist_ok=True)
    datetime_published = datetime.fromtimestamp(item_json["published"], config.timezone).strftime(config.time_format)
    html_name = f"{datetime_published}\t[{item_json["origin_title"]}]\t{item_json["title"]}.html".replace("/", "-")
    html_name = html_name[:200] + '...html' if len(html_name) > 200 else html_name
    html_path = html_directory / html_name
    return html_path

def synchronize_with_server(config, client_session):
    # Synchronize items from the server, generating and deleting JSON and HTML files accordingly
    config.update_lock.touch()
    print("Synchronizing with server...")

    new_items = 0
    grabbed_item_paths = []

    folders = client_session.list_folders()
    for (folder_name, folder_id) in folders:
        print(f"  Updating folder {folder_name}")

        def process(item_ids):
            nonlocal new_items, grabbed_item_paths
            if len(item_ids) > 0:
                item_contents = client_session.get_stream_items_contents(item_ids=item_ids)
                for item_content in item_contents.items:
                    item_json = {
                        "id": item_content.id,
                        "folder": folder_name,
                        "title": item_content.title,
                        "published": item_content.published,
                        "updated": item_content.updated,
                        "author": item_content.author,
                        "summary": item_content.summary.content,
                        "content": item_content.content.content,
                        "origin_title": item_content.origin.title,
                        "origin_url": item_content.origin.html_url,
                        "canonical_url": item_content.canonical[0].href,
                    }
                    item_json["html_path"] = str(get_html_path(config, item_json).relative_to(config.html_root))

                    p = config.json_root / f"{ sha256(item_json["id"].encode("utf-8")).hexdigest() }.json"
                    grabbed_item_paths.append(p)
                    if not p.exists():
                        # write JSON
                        with p.open("w") as f:
                            json.dump(item_json, f)
                        # write HTML
                        generate_html_for_item(config, item_json)
                        new_items += 1

        continuation = None
        while continuation != '':
            items = client_session.get_stream_items_ids(stream_id=folder_id, exclude_target="user/-/state/com.google/read", limit=config.items_per_query, continuation=continuation)
            item_ids = [item.id for item in items.item_refs]
            process(item_ids)
            continuation = items.continuation
    
    # Remove items that we didn't get from the server but are in the JSON directory
    removed_items = 0
    for item_path in config.json_root.glob("*.json"):
        if not item_path in grabbed_item_paths:
            # remove HTML
            item_json = json.load(item_path.open("r"))
            remove_html_for_item(config, item_json)
            # remove JSON
            item_path.unlink()
            removed_items += 1
    
    print(f"Synchronization successful ({new_items} new items, {removed_items} removed)")
    config.update_lock.unlink()

def generate_html_for_item(config, item_json):
    # Write HTML file for a JSON object
    datetime_published = datetime.fromtimestamp(item_json["published"], config.timezone).strftime(config.time_format)
    html_path = config.html_root / item_json["html_path"]
    if html_path.exists():
        print(f"WARNING: a file already exist for {html_path}. Either the feed has duplicate entries, or something has gone terribly wrong.")
    else:
        with html_path.open("w") as f:
            f.write(f"""
    <!doctype html>
    <html lang="en-US">
    <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width" />
    <title>{item_json["title"]}</title>
    </head>
    <body style="background-color:black; color:white;">
    <style>a{{color:palevioletred; text-decoration:none;}}</style>
    <article style="max-width:60rem; margin:auto;">
        <p style="display:flex; flex-direction:row; justify-content:space-between;">
            <span>{datetime_published}</span>
            <span><a href="{item_json["origin_url"]}">{item_json["origin_title"]}</a></span>
        </p>
        <h1><a href="{item_json["canonical_url"]}">{item_json["title"]}</a></h1>
        <h3>{item_json["author"]}</h3>
        <div>{item_json["summary"]}</div>
        <div>{item_json["content"]}</div>
    </article>
    </body>
    </html>
    """)

def remove_html_for_item(config, item_json):
    # Delete a HTML file for a JSON object
    html_path = config.html_root / item_json["html_path"]
    html_path.unlink()

def remove_empty_html_directories(config):
    # Remove empty directories in the HTML directory
    html_root = config.html_root
    for (dirpath, dirnames, filenames) in html_root.walk(top_down=False):
        if dirpath != html_root:
            if len(dirnames) == 0 and len(filenames) == 0:
                dirpath.rmdir()

def process(config, client_session):
    # Do a full feather update
    mark_deleted_as_read(config, client_session)
    synchronize_with_server(config, client_session)
    remove_empty_html_directories(config)

#%% Run feather

def main():
    config = Config()
    client_session = ClientSession(config)
    process(config, client_session)

if __name__ == "__main__":
    main()