mirror of
https://codeberg.org/Reuh/feather.git
synced 2025-10-27 10:09:32 +00:00
feat: make sure on-disk data is always in a consistent state during writes
This commit is contained in:
parent
66e7aa0eb4
commit
59e7530784
5 changed files with 69 additions and 58 deletions
|
|
@ -70,7 +70,6 @@ before publishing:
|
||||||
- [ ] Write documentation
|
- [ ] Write documentation
|
||||||
- [ ] Share the fun somewhere
|
- [ ] Share the fun somewhere
|
||||||
- [ ] Test with FreshRSS
|
- [ ] Test with FreshRSS
|
||||||
- [ ] Check locks & errors while writing to db (if JSON file corrupted)
|
|
||||||
- [ ] Feed & category order for googlereader ??
|
- [ ] Feed & category order for googlereader ??
|
||||||
|
|
||||||
"i'll look into it later":
|
"i'll look into it later":
|
||||||
|
|
|
||||||
|
|
@ -128,7 +128,7 @@ class GReaderArticle(Article):
|
||||||
self.feed_url = item_content.origin.html_url
|
self.feed_url = item_content.origin.html_url
|
||||||
self.article_url = item_content.canonical[0].href
|
self.article_url = item_content.canonical[0].href
|
||||||
|
|
||||||
self.compute_json_path()
|
self._compute_json_path()
|
||||||
|
|
||||||
|
|
||||||
class TTRSession(ClientSession):
|
class TTRSession(ClientSession):
|
||||||
|
|
@ -227,4 +227,4 @@ class TTRArticle(Article):
|
||||||
self.language = article.lang
|
self.language = article.lang
|
||||||
self.image_url = article.flavor_image
|
self.image_url = article.flavor_image
|
||||||
|
|
||||||
self.compute_json_path()
|
self._compute_json_path()
|
||||||
|
|
|
||||||
|
|
@ -85,9 +85,6 @@ class Config:
|
||||||
self.daemon_sync_up_every: int = int(get_config("daemon", "sync_up_every"))
|
self.daemon_sync_up_every: int = int(get_config("daemon", "sync_up_every"))
|
||||||
self.daemon_sync_down_every: int = int(get_config("daemon", "sync_down_every"))
|
self.daemon_sync_down_every: int = int(get_config("daemon", "sync_down_every"))
|
||||||
|
|
||||||
# Computed config fields
|
|
||||||
self.update_lock: Path = self.json_root / "update.lock"
|
|
||||||
|
|
||||||
# Create missing directories
|
# Create missing directories
|
||||||
self.html_root.mkdir(exist_ok=True)
|
self.html_root.mkdir(exist_ok=True)
|
||||||
self.json_root.mkdir(exist_ok=True)
|
self.json_root.mkdir(exist_ok=True)
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ from abc import ABC
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
|
from tempfile import NamedTemporaryFile
|
||||||
|
|
||||||
from feather.config import Config
|
from feather.config import Config
|
||||||
|
|
||||||
|
|
@ -17,7 +18,7 @@ def sanitize_filename(
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Escape invalid caracters and truncate the filename as per the configuration.
|
"""Escape invalid caracters and truncate the filename as per the configuration.
|
||||||
This operates on a single filename, not a path.
|
This operates on a single filename, not a path.
|
||||||
(insert_before_suffix will be inserted between the stem and suffix, and is assumed to not need escaping)"""
|
(insert_before_suffix will be inserted between the stem and suffix, and is assumed to not need escaping)."""
|
||||||
filename = filename.translate(config.filename_translation)
|
filename = filename.translate(config.filename_translation)
|
||||||
|
|
||||||
max_filename_length = config.max_filename_length
|
max_filename_length = config.max_filename_length
|
||||||
|
|
@ -38,12 +39,24 @@ def sanitize_filename(
|
||||||
|
|
||||||
|
|
||||||
def format_datetime(config: Config, timestamp: int) -> str:
|
def format_datetime(config: Config, timestamp: int) -> str:
|
||||||
"""Format a timestamp according to the configuraiton"""
|
"""Format a timestamp according to the configuraiton."""
|
||||||
return datetime.fromtimestamp(timestamp, config.timezone).strftime(
|
return datetime.fromtimestamp(timestamp, config.timezone).strftime(
|
||||||
config.time_format
|
config.time_format
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def atomic_write(path: Path, content: str):
|
||||||
|
"""Write data to a file atomically."""
|
||||||
|
try:
|
||||||
|
with NamedTemporaryFile(dir=path.parent, delete=False) as f:
|
||||||
|
path_temp = Path(f.name)
|
||||||
|
f.write(content.encode("utf-8"))
|
||||||
|
path_temp.rename(path)
|
||||||
|
except:
|
||||||
|
path_temp.unlink(missing_ok=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
type CategoryId = int | str
|
type CategoryId = int | str
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -100,6 +113,9 @@ class Article(ABC):
|
||||||
language: str = "" # article language
|
language: str = "" # article language
|
||||||
image_url: str = "" # article main image
|
image_url: str = "" # article main image
|
||||||
|
|
||||||
|
def _hash_id(self):
|
||||||
|
return sha256(str(self.id).encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
def _get_html_path(self) -> Path:
|
def _get_html_path(self) -> Path:
|
||||||
config = self.config
|
config = self.config
|
||||||
|
|
||||||
|
|
@ -121,7 +137,7 @@ class Article(ABC):
|
||||||
html_path = category_directory / html_name
|
html_path = category_directory / html_name
|
||||||
# Add hash if filename already exists
|
# Add hash if filename already exists
|
||||||
if html_path.exists():
|
if html_path.exists():
|
||||||
id_hash = sha256(str(self.id).encode("utf-8")).hexdigest()[:8]
|
id_hash = self._hash_id()[:8]
|
||||||
html_path = html_path.parent / sanitize_filename(
|
html_path = html_path.parent / sanitize_filename(
|
||||||
config, html_path.name, insert_before_suffix=f".{id_hash}"
|
config, html_path.name, insert_before_suffix=f".{id_hash}"
|
||||||
)
|
)
|
||||||
|
|
@ -152,16 +168,13 @@ class Article(ABC):
|
||||||
d["category"] = self.category.asdict()
|
d["category"] = self.category.asdict()
|
||||||
return d
|
return d
|
||||||
|
|
||||||
def compute_json_path(self):
|
def _compute_json_path(self):
|
||||||
self.json_path = (
|
self.json_path = self.config.json_root / f"{self._hash_id()}.json"
|
||||||
self.config.json_root
|
|
||||||
/ f"{sha256(str(self.id).encode('utf-8')).hexdigest()}.json"
|
|
||||||
)
|
|
||||||
|
|
||||||
def write_json(self, recompute_path=False):
|
def _write_json(self, recompute_path=False):
|
||||||
"""Write the JSON file associated with this article. Error if it already exists."""
|
"""Write the JSON file associated with this article. Error if it already exists."""
|
||||||
if recompute_path:
|
if recompute_path:
|
||||||
self.compute_json_path()
|
self._compute_json_path()
|
||||||
stored_fields = (
|
stored_fields = (
|
||||||
"id",
|
"id",
|
||||||
"unread",
|
"unread",
|
||||||
|
|
@ -184,25 +197,16 @@ class Article(ABC):
|
||||||
article_json = {field: getattr(self, field) for field in stored_fields}
|
article_json = {field: getattr(self, field) for field in stored_fields}
|
||||||
article_json["category"] = self.category.asdict()
|
article_json["category"] = self.category.asdict()
|
||||||
if self.json_path.exists():
|
if self.json_path.exists():
|
||||||
raise Exception(
|
raise FileExistsError(
|
||||||
f"Unexpectedly tried to overwrite article data for {self.json_path}"
|
f"Unexpectedly tried to overwrite article data for {self.json_path}: either the feed has duplicate entries, or another feather process tried to write the same file at the same time (in which case, try again later)"
|
||||||
)
|
)
|
||||||
with self.json_path.open("w") as f:
|
atomic_write(self.json_path, json.dumps(article_json))
|
||||||
json.dump(article_json, f)
|
|
||||||
|
|
||||||
def delete_json(self):
|
def _delete_json(self, missing_ok=False):
|
||||||
"""Delete the JSON file associated with this article."""
|
"""Delete the JSON file associated with this article."""
|
||||||
self.json_path.unlink()
|
self.json_path.unlink(missing_ok=missing_ok)
|
||||||
|
|
||||||
def has_html(self) -> bool:
|
def _write_html(self, recompute_path=False):
|
||||||
"""Check if the HTML file associated with the article exists on disk."""
|
|
||||||
if self.html_path is None:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
html_path = self.config.html_root / self.html_path
|
|
||||||
return html_path.exists()
|
|
||||||
|
|
||||||
def write_html(self, recompute_path=False):
|
|
||||||
"""Write the HTML file associated with this article. Error if it already exists.
|
"""Write the HTML file associated with this article. Error if it already exists.
|
||||||
Note: this may recompute html_path, which is saved into the JSON - so make sure to save the JSON file _after_ the HTML file."""
|
Note: this may recompute html_path, which is saved into the JSON - so make sure to save the JSON file _after_ the HTML file."""
|
||||||
# Write HTML file for a JSON object
|
# Write HTML file for a JSON object
|
||||||
|
|
@ -212,43 +216,62 @@ class Article(ABC):
|
||||||
self.html_path = str(self._get_html_path())
|
self.html_path = str(self._get_html_path())
|
||||||
|
|
||||||
html_path = config.html_root / self.html_path
|
html_path = config.html_root / self.html_path
|
||||||
|
html_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
if html_path.exists():
|
if html_path.exists():
|
||||||
raise Exception(
|
raise FileExistsError(
|
||||||
f"Unexpectedly tried to overwrite article file for {html_path}. Either the feed has duplicate entries, or something has gone terribly wrong."
|
f"Unexpectedly tried to overwrite article file for {html_path}: either the feed has duplicate entries, or another feather process tried to write the same file at the same time (in which case, try again later)"
|
||||||
)
|
)
|
||||||
else:
|
atomic_write(
|
||||||
html_path.parent.mkdir(parents=True, exist_ok=True)
|
html_path, config.article_template.render(self._get_template_dict())
|
||||||
with html_path.open("w") as f:
|
)
|
||||||
f.write(config.article_template.render(self._get_template_dict()))
|
# set accessed date to update time, modified to publication time
|
||||||
# set accessed date to update time, modified to publication time
|
os.utime(html_path, (max(self.updated, self.updated), self.published))
|
||||||
os.utime(html_path, (max(self.updated, self.updated), self.published))
|
|
||||||
|
|
||||||
def delete_html(self, ignore_deleted=False):
|
def _delete_html(self, missing_ok=False):
|
||||||
"""Delete the HTML file associated with this article."""
|
"""Delete the HTML file associated with this article."""
|
||||||
# Delete a HTML file for a JSON object
|
# Delete a HTML file for a JSON object
|
||||||
html_path = self.config.html_root / self.html_path
|
html_path = self.config.html_root / self.html_path
|
||||||
if not ignore_deleted or html_path.exists():
|
html_path.unlink(missing_ok=missing_ok)
|
||||||
html_path.unlink()
|
|
||||||
|
def has_html(self) -> bool:
|
||||||
|
"""Check if the HTML file associated with the article exists on disk."""
|
||||||
|
if self.html_path is None:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
html_path = self.config.html_root / self.html_path
|
||||||
|
return html_path.exists()
|
||||||
|
|
||||||
|
def was_updated(self, old_article: Article) -> bool:
|
||||||
|
"""Returns true if the article is different from a previous version in a way that would require regeneration"""
|
||||||
|
return old_article._get_template_dict() != self._get_template_dict()
|
||||||
|
|
||||||
def write(self, recompute_paths=False):
|
def write(self, recompute_paths=False):
|
||||||
"""Write all the files associated with this article to disk."""
|
"""Write all the files associated with this article to disk."""
|
||||||
self.write_html(recompute_path=recompute_paths)
|
try:
|
||||||
self.write_json(recompute_path=recompute_paths)
|
self._write_html(recompute_path=recompute_paths)
|
||||||
|
except FileExistsError:
|
||||||
|
raise
|
||||||
|
except:
|
||||||
|
self._delete_html(missing_ok=True)
|
||||||
|
raise
|
||||||
|
try:
|
||||||
|
self._write_json(recompute_path=recompute_paths)
|
||||||
|
except FileExistsError:
|
||||||
|
raise
|
||||||
|
except:
|
||||||
|
self._delete_json(missing_ok=True)
|
||||||
|
raise
|
||||||
|
|
||||||
def delete(self):
|
def delete(self):
|
||||||
"""Delete all the files associated with this article."""
|
"""Delete all the files associated with this article."""
|
||||||
self.delete_html(ignore_deleted=True)
|
self._delete_html(missing_ok=True)
|
||||||
self.delete_json()
|
self._delete_json()
|
||||||
|
|
||||||
def regenerate(self):
|
def regenerate(self):
|
||||||
"""Delete and rewrite all the files associated with this article using to the latest configuration."""
|
"""Delete and rewrite all the files associated with this article using to the latest configuration."""
|
||||||
self.delete() # paths might change so we preemptively remove the old file
|
self.delete() # paths might change so we preemptively remove the old file
|
||||||
self.write(recompute_paths=True) # rewrite JSON & HTML
|
self.write(recompute_paths=True) # rewrite JSON & HTML
|
||||||
|
|
||||||
def was_updated(self, old_article: Article) -> bool:
|
|
||||||
"""Returns true if the article is different from a previous version in a way that would require regeneration"""
|
|
||||||
return old_article._get_template_dict() != self._get_template_dict()
|
|
||||||
|
|
||||||
|
|
||||||
class FileArticle(Article):
|
class FileArticle(Article):
|
||||||
def __init__(self, config: Config, json_path: Path) -> Article:
|
def __init__(self, config: Config, json_path: Path) -> Article:
|
||||||
|
|
|
||||||
|
|
@ -59,12 +59,6 @@ class FeatherApp:
|
||||||
config = self.config
|
config = self.config
|
||||||
client_session = self.get_client_session()
|
client_session = self.get_client_session()
|
||||||
|
|
||||||
if config.update_lock.exists():
|
|
||||||
print(
|
|
||||||
"The previous synchronization was aborted, not marking any article as read/unread in order to avoid collateral damage"
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
# gather articles to mark as read/unread
|
# gather articles to mark as read/unread
|
||||||
marked_as_read, marked_as_unread = 0, 0
|
marked_as_read, marked_as_unread = 0, 0
|
||||||
to_mark_as_read = []
|
to_mark_as_read = []
|
||||||
|
|
@ -108,7 +102,6 @@ class FeatherApp:
|
||||||
config = self.config
|
config = self.config
|
||||||
client_session = self.get_client_session()
|
client_session = self.get_client_session()
|
||||||
|
|
||||||
config.update_lock.touch()
|
|
||||||
print("Synchronizing with server...")
|
print("Synchronizing with server...")
|
||||||
|
|
||||||
new_articles, updated_articles = 0, 0
|
new_articles, updated_articles = 0, 0
|
||||||
|
|
@ -154,7 +147,6 @@ class FeatherApp:
|
||||||
print(
|
print(
|
||||||
f"Synchronization successful ({new_articles} new articles, {updated_articles} updated, {removed_articles} removed)"
|
f"Synchronization successful ({new_articles} new articles, {updated_articles} updated, {removed_articles} removed)"
|
||||||
)
|
)
|
||||||
config.update_lock.unlink()
|
|
||||||
|
|
||||||
def synchronize(self):
|
def synchronize(self):
|
||||||
"""Do a full feather update"""
|
"""Do a full feather update"""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue