diff --git a/README.md b/README.md index 9ce9bde..ebb8621 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ - [ ] Nested categories - [ ] Share the fun somewhere - [x] Edge cases: mark as read during sync (if marked as read on server or not) -- [ ] Proper filename escaping +- [x] Proper filename escaping - [ ] Command to force regenerate all HTML files (incl. recompute datetimes & paths) - [ ] Handle item updates diff --git a/config.default.toml b/config.default.toml index 366bc9c..49f385f 100644 --- a/config.default.toml +++ b/config.default.toml @@ -44,6 +44,8 @@ template = ''' filename_template = "{{ published_formatted }}\t[{{ origin_title }}]\t{{ title }}.html" # Maximum allowed filename length (in bytes assuming UTF-8 encoding) before truncating. Depending on your filesystem filename's limits it may be possible to increase the value, ask Wikipedia for details. max_filename_length = 250 +# Table mapping characters to what they will be replaced with in filenames. Useful to remove/replace characters that are not allowed in filename by your filesystem. The default should be fine for most Unix filesystems. +filename_replacement = { "/" = "⧸", "\u0000" = "" } [time] # Which timezone to use when writing date and time. diff --git a/main.py b/main.py index ce03824..c9ce42a 100644 --- a/main.py +++ b/main.py @@ -46,6 +46,7 @@ class Config: self.item_template: Template = Template(get_config("html", "template"), autoescape=True) self.item_filename_template: Template = Template(get_config("html", "filename_template"), autoescape=False) self.max_filename_length: int = int(get_config("html", "max_filename_length")) + self.filename_translation = str.maketrans(get_config("html", "filename_replacement")) # Computed config fields self.update_lock = self.json_root / "update.lock" @@ -109,8 +110,8 @@ def mark_deleted_as_read(config, client_session): print(f"Marked {marked_as_read} items as read") -def escape_filename(filename): - return filename.replace("/", "-") +def escape_filename(config, filename): + return filename.translate(config.filename_translation) def truncate_filename(config, filename): max_filename_length = config.max_filename_length @@ -124,10 +125,10 @@ def truncate_filename(config, filename): return filename[:cutoff] + '…' + suffix def get_html_path(config, item_json): - folder_directory = config.html_root / escape_filename(item_json["folder"]) + folder_directory = config.html_root / escape_filename(config, item_json["folder"]) folder_directory.mkdir(exist_ok=True) - html_name = truncate_filename(config, escape_filename(config.item_filename_template.render(item_json))) + html_name = truncate_filename(config, escape_filename(config, config.item_filename_template.render(item_json))) return folder_directory / html_name