From cee00eb13d6c931f44d0057c8aac7076138811fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= Date: Fri, 19 Jan 2024 14:37:06 +0100 Subject: [PATCH] [internal] normalize newlines and bom --- anselme/parser/init.lua | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/anselme/parser/init.lua b/anselme/parser/init.lua index ce2694b..b9f7413 100644 --- a/anselme/parser/init.lua +++ b/anselme/parser/init.lua @@ -9,8 +9,16 @@ local function expect_end(exp, rem) return exp end +-- we require UTF-8 but life is full of disapointments +-- remove BOM +-- \r\n and \r -> \n +local function normalize_encoding(str) + return str:gsub("^"..string.char(0xEF, 0xBB, 0xBF), "") + :gsub("\r\n?", "\n") +end + -- parse code (string) with the associated source (Source) -- the returned AST tree is stateless and can be stored/evaluated/etc as you please return function(code, source) - return expect_end(block(Source:new(source, 1, 1), Options:new(), code)) + return expect_end(block(Source:new(source, 1, 1), Options:new(), normalize_encoding(code))) end