diff --git a/anselme/common/init.lua b/anselme/common/init.lua index 9e0cf6a..f40fcdf 100644 --- a/anselme/common/init.lua +++ b/anselme/common/init.lua @@ -13,9 +13,11 @@ local common = { trim = function(str) return str:match("^%s*(.-)%s*$") end, + -- format ansi colored string fmt = function(str, ...) return ansicolors(str):format(...) end, + -- generate a uuidv4 uuid = function() return ("xxxxxxxx-xxxx-4xxx-Nxxx-xxxxxxxxxxxx") -- version 4 :gsub("N", math.random(0x8, 0xb)) -- variant 1 diff --git a/anselme/parser/Options.lua b/anselme/parser/Options.lua new file mode 100644 index 0000000..486325e --- /dev/null +++ b/anselme/parser/Options.lua @@ -0,0 +1,23 @@ +local class = require("anselme.lib.class") + +local options = { "limit_pattern", "allow_newlines" } + +local Options +Options = class { + limit_pattern = nil, + allow_newlines = false, + + with = function(self, t) + local r = Options:new() + for _, opt in ipairs(options) do + if t[opt] ~= nil then + r[opt] = t[opt] + else + r[opt] = self[opt] + end + end + return r + end +} + +return Options diff --git a/anselme/parser/Source.lua b/anselme/parser/Source.lua index 8be3f6e..bedb03e 100644 --- a/anselme/parser/Source.lua +++ b/anselme/parser/Source.lua @@ -29,6 +29,13 @@ Source = class { self:increment(utf8.len(capture:match("[^\n]*$"))) return ... end, + consume_leading_whitespace = function(self, options, str) + if options.allow_newlines then + return self:consume(str:match("^([ \t\n]*)(.*)$")) + else + return self:consume(str:match("^([ \t]*)(.*)$")) + end + end, clone = function(self) return Source:new(self.name, self.line, self.position) diff --git a/anselme/parser/expression/block.lua b/anselme/parser/expression/block.lua index 844f883..a0de68d 100644 --- a/anselme/parser/expression/block.lua +++ b/anselme/parser/expression/block.lua @@ -3,7 +3,7 @@ local expression_to_ast = require("anselme.parser.expression.to_ast") local ast = require("anselme.ast") local PartialScope, Block, Flush, Call, Identifier = ast.PartialScope, ast.Block, ast.Flush, ast.Call, ast.Identifier -local function block(source, str) +local function block(source, options, str) local start_source = source:clone() if not str:match("^\n") then @@ -45,7 +45,7 @@ local function block(source, str) -- parse line local s, exp - s, exp, rem = pcall(expression_to_ast, source, line) + s, exp, rem = pcall(expression_to_ast, source, options, line) if not s then error(("invalid expression in block: %s"):format(exp), 0) end -- single implicit _: line was empty (e.g. single comment in the line) diff --git a/anselme/parser/expression/comment.lua b/anselme/parser/expression/comment.lua index be60168..237ef6f 100644 --- a/anselme/parser/expression/comment.lua +++ b/anselme/parser/expression/comment.lua @@ -5,7 +5,8 @@ comment = primary { match = function(self, str) return str:match("^%/%*") end, - parse = function(self, source, str, limit_pattern) + parse = function(self, source, options, str) + local limit_pattern = options.limit_pattern local rem = source:consume(str:match("^(%/%*)(.*)$")) local content_list = {} @@ -27,7 +28,7 @@ comment = primary { -- nested comment if rem:match("^%/%*") then local subcomment - subcomment, rem = comment:parse(source, rem, limit_pattern) + subcomment, rem = comment:parse(source, options, rem) table.insert(content_list, "/*") table.insert(content_list, subcomment) diff --git a/anselme/parser/expression/contextual/function_parameter.lua b/anselme/parser/expression/contextual/function_parameter.lua index 2cbe1bc..62049e3 100644 --- a/anselme/parser/expression/contextual/function_parameter.lua +++ b/anselme/parser/expression/contextual/function_parameter.lua @@ -13,25 +13,26 @@ return primary { match = function(self, str) return identifier:match(str) end, - parse = function(self, source, str, limit_pattern, no_default_value) + parse = function(self, source, options, str, no_default_value) local source_param = source:clone() -- name - local ident, rem = identifier:parse(source, str) + local ident, rem = identifier:parse(source, options, str) + rem = source:consume_leading_whitespace(options, rem) -- value check local value_check - if rem:match("^[ \t]*::") then - local scheck = source:consume(rem:match("^([ \t]*::[ \t]*)(.*)$")) - value_check, rem = expression_to_ast(source, scheck, limit_pattern, value_check_priority) + if rem:match("^::") then + local scheck = source:consume(rem:match("^(::)(.*)$")) + value_check, rem = expression_to_ast(source, options, scheck, value_check_priority) end -- default value local default if not no_default_value then - if rem:match("^[ \t]*=") then - local sdefault = source:consume(rem:match("^([ \t]*=[ \t]*)(.*)$")) - default, rem = expression_to_ast(source, sdefault, limit_pattern, assignment_priority) + if rem:match("^=") then + local sdefault = source:consume(rem:match("^(=)(.*)$")) + default, rem = expression_to_ast(source, options, sdefault, assignment_priority) end end diff --git a/anselme/parser/expression/contextual/function_parameter_no_default.lua b/anselme/parser/expression/contextual/function_parameter_no_default.lua index 381da8f..132c367 100644 --- a/anselme/parser/expression/contextual/function_parameter_no_default.lua +++ b/anselme/parser/expression/contextual/function_parameter_no_default.lua @@ -1,7 +1,7 @@ local function_parameter = require("anselme.parser.expression.contextual.function_parameter") return function_parameter { - parse = function(self, source, str, limit_pattern) - return function_parameter:parse(source, str, limit_pattern, true) + parse = function(self, source, options, str) + return function_parameter:parse(source, options, str, true) end } diff --git a/anselme/parser/expression/contextual/parameter_tuple.lua b/anselme/parser/expression/contextual/parameter_tuple.lua index 1a94b3f..ecd1048 100644 --- a/anselme/parser/expression/contextual/parameter_tuple.lua +++ b/anselme/parser/expression/contextual/parameter_tuple.lua @@ -9,37 +9,42 @@ return primary { match = function(self, str) return str:match("^%(") end, - parse = function(self, source, str, limit_pattern) + parse = function(self, source, options, str) local source_start = source:clone() local parameters = ParameterTuple:new() local rem = source:consume(str:match("^(%()(.*)$")) + rem = source:consume_leading_whitespace(options, rem) -- i would LOVE to reuse the regular list parsing code for this, but unfortunately the list parsing code -- itself depends on this and expect this to be available quite early, and it's ANNOYING - while not rem:match("^[ \t]*%)") do + while not rem:match("^%)") do -- parameter local func_param - func_param, rem = function_parameter:expect(source, rem, limit_pattern) + func_param, rem = function_parameter:expect(source, options, rem) + rem = source:consume_leading_whitespace(options, rem) -- next! comma separator - if not rem:match("^[ \t]*%)") then - if not rem:match("^[ \t]*,") then + if not rem:match("^%)") then + if not rem:match("^,") then error(("unexpected %q at end of argument list"):format(rem:match("^[^\n]*")), 0) end - rem = source:consume(rem:match("^([ \t]*,)(.*)$")) + rem = source:consume(rem:match("^(,)(.*)$")) + rem = source:consume_leading_whitespace(options, rem) end -- add parameters:insert(func_param) end - rem = rem:match("^[ \t]*%)(.*)$") + rem = rem:match("^%)(.*)$") + rem = source:consume_leading_whitespace(options, rem) -- assigment param - if rem:match("^[ \t]*=") then - rem = source:consume(rem:match("^([ \t]*=[ \t]*)(.*)$")) + if rem:match("^=") then + rem = source:consume(rem:match("^(=)(.*)$")) + rem = source:consume_leading_whitespace(options, rem) local func_param - func_param, rem = function_parameter_no_default:expect(source, rem, limit_pattern) + func_param, rem = function_parameter_no_default:expect(source, options, rem) parameters:insert_assignment(func_param) end diff --git a/anselme/parser/expression/primary/anchor.lua b/anselme/parser/expression/primary/anchor.lua index 0bd3d9b..5799b44 100644 --- a/anselme/parser/expression/primary/anchor.lua +++ b/anselme/parser/expression/primary/anchor.lua @@ -13,12 +13,12 @@ return primary { return false end, - parse = function(self, source, str) + parse = function(self, source, options, str) local start_source = source:clone() local rem = source:consume(str:match("^(#)(.-)$")) local ident - ident, rem = identifier:parse(source, rem) + ident, rem = identifier:parse(source, options, rem) return Anchor:new(ident.name):set_source(start_source), rem end diff --git a/anselme/parser/expression/primary/block_identifier.lua b/anselme/parser/expression/primary/block_identifier.lua index 0252fb5..cc8587e 100644 --- a/anselme/parser/expression/primary/block_identifier.lua +++ b/anselme/parser/expression/primary/block_identifier.lua @@ -8,7 +8,7 @@ return primary { return str:match("^_") end, - parse = function(self, source, str) + parse = function(self, source, options, str) local source_start = source:clone() local rem = source:consume(str:match("^(_)(.-)$")) return Call:new(Identifier:new("_"), ArgumentTuple:new()):set_source(source_start), rem diff --git a/anselme/parser/expression/primary/function_definition.lua b/anselme/parser/expression/primary/function_definition.lua index fb9b9b2..f11009d 100644 --- a/anselme/parser/expression/primary/function_definition.lua +++ b/anselme/parser/expression/primary/function_definition.lua @@ -26,18 +26,19 @@ local function_parameter_maybe_parenthesis = function_parameter_no_default { return function_parameter_no_default:match(str) end end, - parse = function(self, source, str, limit_pattern) + parse = function(self, source, options, str) if str:match("^%(") then str = source:consume(str:match("^(%()(.*)$")) - local exp, rem = function_parameter_no_default:parse(source, str, limit_pattern) + local exp, rem = function_parameter_no_default:parse(source, options, str) + rem = source:consume_leading_whitespace(options, rem) - if not rem:match("^[ \t]*%)") then error(("unexpected %q at end of parenthesis"):format(rem:match("^[^\n]*")), 0) end - rem = source:consume(rem:match("^([ \t]*%))(.-)$")) + if not rem:match("^%)") then error(("unexpected %q at end of parenthesis"):format(rem:match("^[^\n]*")), 0) end + rem = source:consume(rem:match("^(%))(.-)$")) return exp, rem else - return function_parameter_no_default:parse(source, str, limit_pattern) + return function_parameter_no_default:parse(source, options, str) end end } @@ -46,7 +47,7 @@ local function_parameter_maybe_parenthesis = function_parameter_no_default { -- :$-parameter exp -- returns symbol, parameter_tuple, rem if success -- return nil otherwise -local function search_prefix_signature(modifiers, source, str, limit_pattern) +local function search_prefix_signature(modifiers, source, options, str) for _, pfx in ipairs(prefixes) do local prefix = pfx[1] local prefix_pattern = "[ \t]*"..escape(prefix).."[ \t]*" @@ -57,7 +58,7 @@ local function search_prefix_signature(modifiers, source, str, limit_pattern) -- parameters local parameter - parameter, rem = function_parameter_maybe_parenthesis:expect(source, rem, limit_pattern) + parameter, rem = function_parameter_maybe_parenthesis:expect(source, options, rem) local parameters = ParameterTuple:new() parameters:insert(parameter) @@ -72,10 +73,10 @@ end -- :$parameterA + parameterB exp -- returns symbol, parameter_tuple, rem if success -- return nil otherwise -local function search_infix_signature(modifiers, source, str, limit_pattern) +local function search_infix_signature(modifiers, source, options, str) if function_parameter_maybe_parenthesis:match(str) then local src = source:clone() -- operate on clone source since search success is not yet guaranteed - local parameter_a, rem = function_parameter_maybe_parenthesis:parse(src, str, limit_pattern) + local parameter_a, rem = function_parameter_maybe_parenthesis:parse(src, options, str) local parameters = ParameterTuple:new() parameters:insert(parameter_a) @@ -91,7 +92,7 @@ local function search_infix_signature(modifiers, source, str, limit_pattern) -- parameters if function_parameter_maybe_parenthesis:match(rem) then local parameter_b - parameter_b, rem = function_parameter_maybe_parenthesis:parse(src, rem, limit_pattern) + parameter_b, rem = function_parameter_maybe_parenthesis:parse(src, options, rem) parameters:insert(parameter_b) @@ -109,10 +110,10 @@ end -- :$parameter! exp -- returns symbol, parameter_tuple, rem if success -- return nil otherwise -local function search_suffix_signature(modifiers, source, str, limit_pattern) +local function search_suffix_signature(modifiers, source, options, str) if function_parameter_maybe_parenthesis:match(str) then local src = source:clone() -- operate on clone source since search success is not yet guaranteed - local parameter_a, rem = function_parameter_maybe_parenthesis:parse(src, str, limit_pattern) + local parameter_a, rem = function_parameter_maybe_parenthesis:parse(src, options, str) local parameters = ParameterTuple:new() parameters:insert(parameter_a) @@ -136,10 +137,10 @@ end -- :$identifier(parameter_tuple, ...) exp -- returns symbol, parameter_tuple, rem if success -- return nil otherwise -local function search_function_signature(modifiers, source, str, limit_pattern) +local function search_function_signature(modifiers, source, options, str) if identifier:match(str) then local name_source = source:clone() - local name, rem = identifier:parse(source, str, limit_pattern) + local name, rem = identifier:parse(source, options, str) -- name local symbol = name:to_symbol(modifiers):set_source(name_source) @@ -147,7 +148,7 @@ local function search_function_signature(modifiers, source, str, limit_pattern) -- parse eventual parameters local parameters if parameter_tuple:match(rem) then - parameters, rem = parameter_tuple:parse(source, rem) + parameters, rem = parameter_tuple:parse(source, options, rem) else parameters = ParameterTuple:new() end @@ -161,7 +162,7 @@ return primary { return str:match("^%::?&?@?%$") end, - parse = function(self, source, str, limit_pattern) + parse = function(self, source, options, str) local source_start = source:clone() local mod_const, mod_alias, mod_exported, rem = source:consume(str:match("^(%:(:?)(&?)(@?)%$)(.-)$")) @@ -174,16 +175,16 @@ return primary { -- search for a valid signature local symbol, parameters - local s, p, r = search_prefix_signature(modifiers, source, rem, limit_pattern) + local s, p, r = search_prefix_signature(modifiers, source, options, rem) if s then symbol, parameters, rem = s, p, r else - s, p, r = search_infix_signature(modifiers, source, rem, limit_pattern) + s, p, r = search_infix_signature(modifiers, source, options, rem) if s then symbol, parameters, rem = s, p, r else - s, p, r = search_suffix_signature(modifiers, source, rem, limit_pattern) + s, p, r = search_suffix_signature(modifiers, source, options, rem) if s then symbol, parameters, rem = s, p, r else - s, p, r = search_function_signature(modifiers, source, rem, limit_pattern) + s, p, r = search_function_signature(modifiers, source, options, rem) if s then symbol, parameters, rem = s, p, r end end end @@ -193,7 +194,7 @@ return primary { if symbol then -- parse expression local right - s, right, rem = pcall(expression_to_ast, source, rem, limit_pattern, operator_priority["$_"]) + s, right, rem = pcall(expression_to_ast, source, options, rem, operator_priority["$_"]) if not s then error(("invalid expression in function definition: %s"):format(right), 0) end -- return function diff --git a/anselme/parser/expression/primary/identifier.lua b/anselme/parser/expression/primary/identifier.lua index f14387c..416de41 100644 --- a/anselme/parser/expression/primary/identifier.lua +++ b/anselme/parser/expression/primary/identifier.lua @@ -27,7 +27,7 @@ return primary { return false end, - parse = function(self, source, str) + parse = function(self, source, options, str) for _, pat in ipairs(identifier_patterns) do if str:match("^"..pat) then local start_source = source:clone() diff --git a/anselme/parser/expression/primary/implicit_block_identifier.lua b/anselme/parser/expression/primary/implicit_block_identifier.lua index e8959f0..5fc8b2f 100644 --- a/anselme/parser/expression/primary/implicit_block_identifier.lua +++ b/anselme/parser/expression/primary/implicit_block_identifier.lua @@ -8,7 +8,7 @@ return primary { return str:match("^\n") end, - parse = function(self, source, str) + parse = function(self, source, options, str) -- implicit _, do not consume the newline local r = Call:new(Identifier:new("_"), ArgumentTuple:new()):set_source(source) r.explicit = false diff --git a/anselme/parser/expression/primary/init.lua b/anselme/parser/expression/primary/init.lua index 84b0184..bf3e357 100644 --- a/anselme/parser/expression/primary/init.lua +++ b/anselme/parser/expression/primary/init.lua @@ -33,14 +33,14 @@ local primaries = { return { -- returns exp, rem if expression found -- returns nil if no expression found - search = function(self, source, str, limit_pattern) - str = source:consume(str:match("^([ \t]*)(.*)$")) + search = function(self, source, options, str) + str = source:consume_leading_whitespace(options, str) -- if there is a comment, restart the parsing after the comment ends - local c, c_rem = comment:search(source, str, limit_pattern) - if c then return self:search(source, c_rem, limit_pattern) end + local c, c_rem = comment:search(source, options, str) + if c then return self:search(source, options, c_rem) end -- search primary for _, primary in ipairs(primaries) do - local exp, rem = primary:search(source, str, limit_pattern) + local exp, rem = primary:search(source, options, str) if exp then return exp, rem end end end diff --git a/anselme/parser/expression/primary/number.lua b/anselme/parser/expression/primary/number.lua index b9da59d..18a2ef4 100644 --- a/anselme/parser/expression/primary/number.lua +++ b/anselme/parser/expression/primary/number.lua @@ -6,7 +6,7 @@ return primary { match = function(self, str) return str:match("^%d*%.%d+") or str:match("^%d+") end, - parse = function(self, source, str) + parse = function(self, source, options, str) local start_source = source:clone() local d, r = str:match("^(%d*%.%d+)(.*)$") if not d then diff --git a/anselme/parser/expression/primary/parenthesis.lua b/anselme/parser/expression/primary/parenthesis.lua index 7d9f802..c050daa 100644 --- a/anselme/parser/expression/primary/parenthesis.lua +++ b/anselme/parser/expression/primary/parenthesis.lua @@ -11,20 +11,24 @@ return primary { match = function(self, str) return str:match("^%(") end, - parse = function(self, source, str) + parse = function(self, source, options, str) local start_source = source:clone() + local opts = options:with{ limit_pattern = "%)", allow_newlines = true } + local rem = source:consume(str:match("^(%()(.*)$")) + rem = source:consume_leading_whitespace(opts, rem) local exp - if rem:match("^%s*%)") then + if rem:match("^%)") then exp = Nil:new() else local s - s, exp, rem = pcall(expression_to_ast, source, rem, "%)") + s, exp, rem = pcall(expression_to_ast, source, opts, rem) if not s then error("invalid expression inside parentheses: "..exp, 0) end - if not rem:match("^%s*%)") then error(("unexpected %q at end of parenthesis"):format(rem:match("^[^\n]*")), 0) end + rem = source:consume_leading_whitespace(opts, rem) + if not rem:match("^%)") then error(("unexpected %q at end of parenthesis"):format(rem:match("^[^\n]*")), 0) end end - rem = source:consume(rem:match("^(%s*%))(.*)$")) + rem = source:consume(rem:match("^(%))(.*)$")) return exp:set_source(start_source), rem end diff --git a/anselme/parser/expression/primary/prefix/function.lua b/anselme/parser/expression/primary/prefix/function.lua index 2984c63..a80afef 100644 --- a/anselme/parser/expression/primary/prefix/function.lua +++ b/anselme/parser/expression/primary/prefix/function.lua @@ -12,7 +12,7 @@ return prefix { operator = "$", priority = operator_priority["$_"], - parse = function(self, source, str, limit_pattern) + parse = function(self, source, options, str) local source_start = source:clone() local escaped = escape(self.operator) local rem = source:consume(str:match("^("..escaped..")(.*)$")) @@ -20,14 +20,14 @@ return prefix { -- parse eventual parameters local parameters if parameter_tuple:match(rem) then - parameters, rem = parameter_tuple:parse(source, rem) + parameters, rem = parameter_tuple:parse(source, options, rem) else parameters = ParameterTuple:new() end -- parse expression local s, right - s, right, rem = pcall(expression_to_ast, source, rem, limit_pattern, self.priority) + s, right, rem = pcall(expression_to_ast, source, options, rem, self.priority) if not s then error(("invalid expression after unop %q: %s"):format(self.operator, right), 0) end return Function:with_return_boundary(parameters, right):set_source(source_start), rem diff --git a/anselme/parser/expression/primary/prefix/prefix.lua b/anselme/parser/expression/primary/prefix/prefix.lua index 1a4212c..0d0206d 100644 --- a/anselme/parser/expression/primary/prefix/prefix.lua +++ b/anselme/parser/expression/primary/prefix/prefix.lua @@ -17,12 +17,12 @@ return primary { return str:match("^"..escaped) end, - parse = function(self, source, str, limit_pattern) + parse = function(self, source, options, str) local source_start = source:clone() local escaped = escape(self.operator) local sright = source:consume(str:match("^("..escaped..")(.*)$")) - local s, right, rem = pcall(expression_to_ast, source, sright, limit_pattern, self.priority) + local s, right, rem = pcall(expression_to_ast, source, options, sright, self.priority) if not s then error(("invalid expression after prefix operator %q: %s"):format(self.operator, right), 0) end return self:build_ast(right):set_source(source_start), rem diff --git a/anselme/parser/expression/primary/prefix/prefix_maybe_nil_right.lua b/anselme/parser/expression/primary/prefix/prefix_maybe_nil_right.lua index e4d907d..e75e0a5 100644 --- a/anselme/parser/expression/primary/prefix/prefix_maybe_nil_right.lua +++ b/anselme/parser/expression/primary/prefix/prefix_maybe_nil_right.lua @@ -6,12 +6,12 @@ local ast = require("anselme.ast") local Nil = ast.Nil return prefix { - parse = function(self, source, str, limit_pattern) + parse = function(self, source, options, str) local source_start = source:clone() local escaped = escape(self.operator) local sright = source:consume(str:match("^("..escaped..")(.*)$")) - local s, right, rem = pcall(expression_to_ast, source, sright, limit_pattern, self.priority) + local s, right, rem = pcall(expression_to_ast, source, options, sright, self.priority) if not s then return self:build_ast(Nil:new()):set_source(source_start), sright else diff --git a/anselme/parser/expression/primary/primary.lua b/anselme/parser/expression/primary/primary.lua index 016f128..5fce6e8 100644 --- a/anselme/parser/expression/primary/primary.lua +++ b/anselme/parser/expression/primary/primary.lua @@ -5,11 +5,11 @@ return class { -- returns exp, rem if expression found -- returns nil if no expression found - search = function(self, source, str, limit_pattern) + search = function(self, source, options, str) if not self:match(str) then return nil end - return self:parse(source, str, limit_pattern) + return self:parse(source, options, str) end, -- return bool -- (not needed if you redefined :search) @@ -18,15 +18,15 @@ return class { end, -- return AST, rem -- (not needed if you redefined :search) - parse = function(self, source, str, limit_pattern) + parse = function(self, source, options, str) error("unimplemented") end, -- class helpers -- -- return AST, rem - expect = function(self, source, str, limit_pattern) - local exp, rem = self:search(source, str, limit_pattern) + expect = function(self, source, options, str) + local exp, rem = self:search(source, options, str) if not exp then error(("expected %s but got %s"):format(self.type, str)) end return exp, rem end diff --git a/anselme/parser/expression/primary/string.lua b/anselme/parser/expression/primary/string.lua index 061b9f9..fd6ce21 100644 --- a/anselme/parser/expression/primary/string.lua +++ b/anselme/parser/expression/primary/string.lua @@ -27,7 +27,8 @@ return primary { match = function(self, str) return str:match("^"..self.start_pattern) end, - parse = function(self, source, str, limit_pattern) + parse = function(self, source, options, str) + local limit_pattern = options.limit_pattern local interpolation = self.interpolation:new() local stop_pattern = escape(self.stop_char) @@ -52,11 +53,13 @@ return primary { -- interpolated expression if rem:match("^%{") then + local opts = options:with { limit_pattern = "%}", allow_newlines = false } local ok, exp - ok, exp, rem = pcall(expression_to_ast, source, source:consume(rem:match("^(%{)(.*)$")), "%}") + ok, exp, rem = pcall(expression_to_ast, source, opts, source:consume(rem:match("^(%{)(.*)$"))) if not ok then error("invalid expression inside interpolation: "..exp, 0) end - if not rem:match("^[ \t]*%}") then error(("unexpected %q at end of interpolation"):format(rem:match("^[^\n]*")), 0) end - rem = source:consume(rem:match("^([ \t]*%})(.*)$")) + rem = source:consume_leading_whitespace(opts, rem) + if not rem:match("^%}") then error(("unexpected %q at end of interpolation"):format(rem:match("^[^\n]*")), 0) end + rem = source:consume(rem:match("^(%})(.*)$")) interpolation:insert(exp) -- escape sequence elseif rem:match("^\\") then diff --git a/anselme/parser/expression/primary/struct.lua b/anselme/parser/expression/primary/struct.lua index 5d3e59b..b6145d9 100644 --- a/anselme/parser/expression/primary/struct.lua +++ b/anselme/parser/expression/primary/struct.lua @@ -9,8 +9,8 @@ return primary { return str:match("^%{") end, - parse = function(self, source, str) - local l, rem = tuple:parse_tuple(source, str, "{", '}') + parse = function(self, source, options, str) + local l, rem = tuple:parse_tuple(source, options, str, "{", '}') return Struct:from_tuple(l), rem end diff --git a/anselme/parser/expression/primary/symbol.lua b/anselme/parser/expression/primary/symbol.lua index 92215e0..7e20b2f 100644 --- a/anselme/parser/expression/primary/symbol.lua +++ b/anselme/parser/expression/primary/symbol.lua @@ -14,7 +14,7 @@ return primary { return false end, - parse = function(self, source, str) + parse = function(self, source, options, str) local mod_const, mod_alias, mod_export, rem = source:consume(str:match("^(%:(:?)(&?)(@?))(.-)$")) local constant, alias, value_check_exp, exported @@ -25,13 +25,13 @@ return primary { -- name local ident - ident, rem = identifier:parse(source, rem) + ident, rem = identifier:parse(source, options, rem) -- value check local nil_val = Nil:new() if value_check:match(rem, 0, nil_val) then local exp - exp, rem = value_check:parse(source, rem, nil, 0, nil_val) + exp, rem = value_check:parse(source, options, rem, 0, nil_val) value_check_exp = exp.arguments.positional[2] end diff --git a/anselme/parser/expression/primary/text.lua b/anselme/parser/expression/primary/text.lua index a21f197..6414015 100644 --- a/anselme/parser/expression/primary/text.lua +++ b/anselme/parser/expression/primary/text.lua @@ -10,9 +10,9 @@ return string { allow_implicit_stop = true, interpolation = TextInterpolation, - parse = function(self, source, str, limit_pattern) + parse = function(self, source, options, str) local start_source = source:clone() - local interpolation, rem = string.parse(self, source, str, limit_pattern) + local interpolation, rem = string.parse(self, source, options, str) -- remove terminal space local last = interpolation.list[#interpolation.list] diff --git a/anselme/parser/expression/primary/tuple.lua b/anselme/parser/expression/primary/tuple.lua index cb77d56..b579400 100644 --- a/anselme/parser/expression/primary/tuple.lua +++ b/anselme/parser/expression/primary/tuple.lua @@ -12,28 +12,32 @@ return primary { return str:match("^%[") end, - parse = function(self, source, str) - return self:parse_tuple(source, str, "[", "]") + parse = function(self, source, options, str) + return self:parse_tuple(source, options, str, "[", "]") end, - parse_tuple = function(self, source, str, start_char, end_char) + parse_tuple = function(self, source, options, str, start_char, end_char) local start_source = source:clone() + local opts = options:with{ limit_pattern = end_char, allow_newlines = true } + local rem = source:consume(str:match("^("..escape(start_char)..")(.*)$")) + rem = source:consume_leading_whitespace(opts, rem) local end_match = escape(end_char) local l - if not rem:match("^[ \t]*"..end_match) then + if not rem:match("^"..end_match) then local s - s, l, rem = pcall(expression_to_ast, source, rem, end_match, nil) + s, l, rem = pcall(expression_to_ast, source, opts, rem) if not s then error("invalid expression in list: "..l, 0) end + rem = source:consume_leading_whitespace(opts, rem) end if not Tuple:is(l) or l.explicit then l = Tuple:new(l) end -- single or no element - if not rem:match("^[ \t]*"..end_match) then + if not rem:match("^"..end_match) then error(("unexpected %q at end of list"):format(rem:match("^[^\n]*")), 0) end - rem = source:consume(rem:match("^([ \t]*"..end_match..")(.*)$")) + rem = source:consume(rem:match("^("..end_match..")(.*)$")) l.explicit = true return l:set_source(start_source), rem diff --git a/anselme/parser/expression/secondary/infix/call.lua b/anselme/parser/expression/secondary/infix/call.lua index e7e8123..2f4b8f5 100644 --- a/anselme/parser/expression/secondary/infix/call.lua +++ b/anselme/parser/expression/secondary/infix/call.lua @@ -14,6 +14,7 @@ return infix { match = function(self, str, current_priority, primary) local escaped = escape(self.operator) + -- TODO: doesn't support newline between ! and identifier, event in multiline expression return self.priority > current_priority and str:match("^"..escaped) and identifier:match(str:match("^"..escaped.."[ \t]*(.-)$")) end, diff --git a/anselme/parser/expression/secondary/infix/implicit_multiplication.lua b/anselme/parser/expression/secondary/infix/implicit_multiplication.lua index 61a95f8..f85816a 100644 --- a/anselme/parser/expression/secondary/infix/implicit_multiplication.lua +++ b/anselme/parser/expression/secondary/infix/implicit_multiplication.lua @@ -15,9 +15,9 @@ return infix { return self.priority > current_priority and identifier:match(str) end, - parse = function(self, source, str, limit_pattern, current_priority, primary) + parse = function(self, source, options, str, current_priority, primary) local start_source = source:clone() - local right, rem = identifier:parse(source, str, limit_pattern) + local right, rem = identifier:parse(source, options, str) local r = Call:new(Identifier:new(self.identifier), ArgumentTuple:new(primary, right)):set_source(start_source) r.explicit = false return r, rem diff --git a/anselme/parser/expression/secondary/infix/infix.lua b/anselme/parser/expression/secondary/infix/infix.lua index a96bf35..f2b8b91 100644 --- a/anselme/parser/expression/secondary/infix/infix.lua +++ b/anselme/parser/expression/secondary/infix/infix.lua @@ -17,12 +17,12 @@ return secondary { end, -- return AST, rem - parse = function(self, source, str, limit_pattern, current_priority, primary) + parse = function(self, source, options, str, current_priority, primary) local start_source = source:clone() local escaped = escape(self.operator) local sright = source:consume(str:match("^("..escaped..")(.*)$")) - local s, right, rem = pcall(expression_to_ast, source, sright, limit_pattern, self.priority) + local s, right, rem = pcall(expression_to_ast, source, options, sright, self.priority) if not s then error(("invalid expression after infix operator %q: %s"):format(self.operator, right), 0) end return self:build_ast(primary, right):set_source(start_source), rem diff --git a/anselme/parser/expression/secondary/infix/infix_or_suffix.lua b/anselme/parser/expression/secondary/infix/infix_or_suffix.lua index 20967d2..41949e4 100644 --- a/anselme/parser/expression/secondary/infix/infix_or_suffix.lua +++ b/anselme/parser/expression/secondary/infix/infix_or_suffix.lua @@ -8,23 +8,23 @@ local expression_to_ast = require("anselme.parser.expression.to_ast") return infix { -- returns exp, rem if expression found -- returns nil if no expression found - search = function(self, source, str, limit_pattern, current_priority, operating_on_primary) + search = function(self, source, options, str, current_priority, operating_on_primary) if not self:match(str, current_priority, operating_on_primary) then return nil end - return self:maybe_parse(source, str, limit_pattern, current_priority, operating_on_primary) + return self:maybe_parse(source, options, str, current_priority, operating_on_primary) end, parse = function() error("no guaranteed parse for this operator") end, -- return AST, rem -- return nil - maybe_parse = function(self, source, str, limit_pattern, current_priority, primary) + maybe_parse = function(self, source, options, str, current_priority, primary) local start_source = source:clone() local escaped = escape(self.operator) local sright = source:consume(str:match("^("..escaped..")(.*)$")) - local s, right, rem = pcall(expression_to_ast, source, sright, limit_pattern, self.priority) + local s, right, rem = pcall(expression_to_ast, source, options, sright, self.priority) if not s then return nil end return self:build_ast(primary, right):set_source(start_source), rem diff --git a/anselme/parser/expression/secondary/infix/tuple.lua b/anselme/parser/expression/secondary/infix/tuple.lua index 451706e..13a2798 100644 --- a/anselme/parser/expression/secondary/infix/tuple.lua +++ b/anselme/parser/expression/secondary/infix/tuple.lua @@ -13,19 +13,20 @@ return infix { priority = operator_priority["_,_"], -- reminder: this :parse method is also called from primary.list as an helper to build list bracket litterals - parse = function(self, source, str, limit_pattern, current_priority, primary) + parse = function(self, source, options, str, current_priority, primary) local start_source = source:clone() local l = Tuple:new() l:insert(primary) local escaped = escape(self.operator) local rem = str - while rem:match("^[ \t]*"..escaped) do - rem = source:consume(rem:match("^([ \t]*"..escaped..")(.*)$")) + while rem:match("^"..escaped) do + rem = source:consume(rem:match("^("..escaped..")(.*)$")) local s, right - s, right, rem = pcall(expression_to_ast, source, rem, limit_pattern, self.priority) + s, right, rem = pcall(expression_to_ast, source, options, rem, self.priority) if not s then error(("invalid expression after binop %q: %s"):format(self.operator, right), 0) end + rem = source:consume_leading_whitespace(options, rem) l:insert(right) end diff --git a/anselme/parser/expression/secondary/init.lua b/anselme/parser/expression/secondary/init.lua index c06e6ac..3106aa3 100644 --- a/anselme/parser/expression/secondary/init.lua +++ b/anselme/parser/expression/secondary/init.lua @@ -51,18 +51,18 @@ end return { -- returns exp, rem if expression found -- returns nil if no expression found - search = function(self, source, str, limit_pattern, current_priority, primary) - str = source:consume(str:match("^([ \t]*)(.*)$")) + search = function(self, source, options, str, current_priority, primary) + str = source:consume_leading_whitespace( options,str) -- if there is a comment, restart the parsing after the comment ends - local c, c_rem = comment:search(source, str, limit_pattern) + local c, c_rem = comment:search(source, options, str) if c then - local ce, ce_rem = self:search(source, c_rem, limit_pattern, current_priority, primary) + local ce, ce_rem = self:search(source, options, c_rem, current_priority, primary) if ce then return ce, ce_rem else return primary, c_rem end -- noop end -- search secondary for _, secondary in ipairs(secondaries) do - local exp, rem = secondary:search(source, str, limit_pattern, current_priority, primary) + local exp, rem = secondary:search(source, options, str, current_priority, primary) if exp then return exp, rem end end end diff --git a/anselme/parser/expression/secondary/secondary.lua b/anselme/parser/expression/secondary/secondary.lua index d811f7a..fc6fb2f 100644 --- a/anselme/parser/expression/secondary/secondary.lua +++ b/anselme/parser/expression/secondary/secondary.lua @@ -5,11 +5,11 @@ return class { -- returns exp, rem if expression found -- returns nil if no expression found - search = function(self, source, str, limit_pattern, current_priority, operating_on_primary) + search = function(self, source, options, str, current_priority, operating_on_primary) if not self:match(str, current_priority, operating_on_primary) then return nil end - return self:parse(source, str, limit_pattern, current_priority, operating_on_primary) + return self:parse(source, options, str, current_priority, operating_on_primary) end, -- return bool -- (not needed if you redefined :search) @@ -19,15 +19,15 @@ return class { -- return AST, rem -- (not needed if you redefined :search) -- assumes that :match was checked before, and can not return nil (may error though) - parse = function(self, source, str, limit_pattern, current_priority, operating_on_primary) + parse = function(self, source, options, str, current_priority, operating_on_primary) error("unimplemented") end, -- class helpers -- -- return AST, rem - expect = function(self, source, str, limit_pattern, current_priority, operating_on_primary) - local exp, rem = self:search(source, str, limit_pattern, current_priority, operating_on_primary) + expect = function(self, source, options, str, current_priority, operating_on_primary) + local exp, rem = self:search(source, options, str, current_priority, operating_on_primary) if not exp then error(("expected %s but got %s"):format(self.type, str)) end return exp, rem end diff --git a/anselme/parser/expression/secondary/suffix/call.lua b/anselme/parser/expression/secondary/suffix/call.lua index 410e5c0..e3fac7e 100644 --- a/anselme/parser/expression/secondary/suffix/call.lua +++ b/anselme/parser/expression/secondary/suffix/call.lua @@ -17,17 +17,17 @@ return secondary { return self.priority > current_priority and (parenthesis:match(str) or tuple:match(str) or struct:match(str)) end, - parse = function(self, source, str, limit_pattern, current_priority, primary) + parse = function(self, source, options, str, current_priority, primary) local start_source = source:clone() local args = ArgumentTuple:new() local exp, rem if parenthesis:match(str) then - exp, rem = parenthesis:parse(source, str, limit_pattern) + exp, rem = parenthesis:parse(source, options, str) if Nil:is(exp) then - if str:match("^%([ \t]*%([ \t]*%)[ \t]*%)") then -- special case: single nil argument + if str:match("^%([ \t\n]*%([ \t\n]*%)[ \t\n]*%)") then -- special case: single nil argument exp = Tuple:new(Nil:new()) else -- no arguments exp = Tuple:new() @@ -36,10 +36,10 @@ return secondary { exp = Tuple:new(exp) end elseif tuple:match(str) then - exp, rem = tuple:parse(source, str, limit_pattern) + exp, rem = tuple:parse(source, options, str) exp = Tuple:new(exp) else - exp, rem = struct:parse(source, str, limit_pattern) + exp, rem = struct:parse(source, options, str) exp = Tuple:new(exp) end diff --git a/anselme/parser/expression/secondary/suffix/suffix.lua b/anselme/parser/expression/secondary/suffix/suffix.lua index e8d8e00..80a4601 100644 --- a/anselme/parser/expression/secondary/suffix/suffix.lua +++ b/anselme/parser/expression/secondary/suffix/suffix.lua @@ -16,7 +16,7 @@ return secondary { return self.priority > current_priority and str:match("^"..escaped) end, - parse = function(self, source, str, limit_pattern, current_priority, primary) + parse = function(self, source, options, str, current_priority, primary) local start_source = source:clone() local escaped = escape(self.operator) diff --git a/anselme/parser/expression/to_ast.lua b/anselme/parser/expression/to_ast.lua index af2597c..b96cb5c 100644 --- a/anselme/parser/expression/to_ast.lua +++ b/anselme/parser/expression/to_ast.lua @@ -4,11 +4,11 @@ local primary, secondary -- parse an expression, starting from a secondary element operating on operating_on_primary -- returns expr, remaining -local function from_secondary(source, s, limit_pattern, current_priority, operating_on_primary) +local function from_secondary(source, options, s, current_priority, operating_on_primary) current_priority = current_priority or 0 -- secondary elements - local exp, rem = secondary:search(source, s, limit_pattern, current_priority, operating_on_primary) - if exp then return from_secondary(source, rem, limit_pattern, current_priority, exp) end + local exp, rem = secondary:search(source, options, s, current_priority, operating_on_primary) + if exp then return from_secondary(source, options, rem, current_priority, exp) end -- nothing to apply on primary return operating_on_primary, s end @@ -18,11 +18,11 @@ end -- limit_pattern: set to a string pattern that will trigger the end of elements that would otherwise consume everything until end-of-line (pattern is not consumed) -- fallback_exp: if no primary expression can be found, will return this instead. Used to avoid raising an error where an empty or comment-only expression is allowed. -- return expr, remaining -local function expression_to_ast(source, s, limit_pattern, current_priority) +local function expression_to_ast(source, options, s, current_priority) current_priority = current_priority or 0 -- primary elements - local exp, rem = primary:search(source, s, limit_pattern) - if exp then return from_secondary(source, rem, limit_pattern, current_priority, exp) end + local exp, rem = primary:search(source, options, s) + if exp then return from_secondary(source, options, rem, current_priority, exp) end -- no valid primary expression error(("no valid expression after %q"):format(s), 0) end @@ -33,7 +33,7 @@ primary = require("anselme.parser.expression.primary") secondary = require("anselme.parser.expression.secondary") -- return expr, remaining -return function(source, s, limit_pattern, current_priority, operating_on_primary) - if operating_on_primary then return from_secondary(source, s, limit_pattern, current_priority, operating_on_primary) - else return expression_to_ast(source, s, limit_pattern, current_priority) end +return function(source, options, s, current_priority, operating_on_primary) + if operating_on_primary then return from_secondary(source, options, s, current_priority, operating_on_primary) + else return expression_to_ast(source, options, s, current_priority) end end diff --git a/anselme/parser/init.lua b/anselme/parser/init.lua index 0eb73d6..ce2694b 100644 --- a/anselme/parser/init.lua +++ b/anselme/parser/init.lua @@ -1,5 +1,6 @@ local block = require("anselme.parser.expression.block") local Source = require("anselme.parser.Source") +local Options = require("anselme.parser.Options") local function expect_end(exp, rem) if rem:match("[^%s]") then @@ -11,5 +12,5 @@ end -- parse code (string) with the associated source (Source) -- the returned AST tree is stateless and can be stored/evaluated/etc as you please return function(code, source) - return expect_end(block(Source:new(source, 1, 1), code)) + return expect_end(block(Source:new(source, 1, 1), Options:new(), code)) end diff --git a/anselme/state/ScopeStack.lua b/anselme/state/ScopeStack.lua index 7018012..8f48204 100644 --- a/anselme/state/ScopeStack.lua +++ b/anselme/state/ScopeStack.lua @@ -6,6 +6,7 @@ local ast = require("anselme.ast") local to_anselme = require("anselme.common.to_anselme") local unpack = table.unpack or unpack +local Source, Options local LuaCall, Environment, Node local parameter_tuple = require("anselme.parser.expression.contextual.parameter_tuple") @@ -50,10 +51,10 @@ local ScopeStack = class { -- if `raw_mode` is true, no anselme-to/from-lua conversion will be performed in the function -- the function will receive the state followed by AST nodes as arguments, and is expected to return an AST node define_lua = function(self, name, value, func, raw_mode) - local source = require("anselme.parser.Source"):new() - local sym = symbol:parse(source, (":%s"):format(name)) + local source, options = Source:new(), Options:new() + local sym = symbol:parse(source, options, (":%s"):format(name)) if func then - local parameters = parameter_tuple:parse(source, value) + local parameters = parameter_tuple:parse(source, options, value) if not raw_mode then local original_func = func func = function(state, ...) @@ -151,5 +152,7 @@ local ScopeStack = class { package.loaded[...] = ScopeStack LuaCall, Environment, Node = ast.LuaCall, ast.Environment, ast.abstract.Node +Source = require("anselme.parser.Source") +Options = require("anselme.parser.Options") return ScopeStack diff --git a/ideas.md b/ideas.md index 9e9fd81..38b26bd 100644 --- a/ideas.md +++ b/ideas.md @@ -10,12 +10,6 @@ Documentation: --- -Translation. - -Do some more fancy scope work to allow the translation to access variables defined in the translation file? - ---- - Standard library. * Text and string manipulation would make sense, but that would require a full UTF-8/Unicode support library like https://github.com/starwing/luautf8. @@ -27,8 +21,22 @@ Standard library. Default arguments and initial variables values should pass the value check associated with the variable / parameter. Issue: dispatch is decided before evaluating default values. +--- + +Multiline string and comments? + +--- + +Error on undefined struct/table key? I copied the Lua behavior but maybe not useful here. + # Can be done later +Translation. + +Do some more fancy scope work to allow the translation to access variables defined in the translation file? + +--- + Server API. To be able to use Anselme in another language, it would be nice to be able to access it over some form of IPC. @@ -45,6 +53,10 @@ Could be reused for exception handling or other purposes if accessible by the us --- +Custom function for building text/string interpolation. + +--- + Reduce the number of AST node types ; try to merge similar node and make simpler individuals nodes if possible by composing them. Won't help with performance but make me feel better, and easier to extend. Anselme should be more minimal is possible. @@ -56,15 +68,6 @@ To draw a graph of branches, keep track of used variables and prune the unused o --- -Multiline expressions. - -* add the ability to escape newlines - Issue: need a way to correctly track line numbers, the current parser assumes one expression = one source -* allow some expressions to run over several lines (the ones that expect a closing token, like paren/list/structs) - Issue: the line and expression parsing is completely separate - ---- - Performance: * the most terribly great choice is the overload with parameter filtering. diff --git a/test/results/brace multiline.ans b/test/results/brace multiline.ans new file mode 100644 index 0000000..f8ea667 --- /dev/null +++ b/test/results/brace multiline.ans @@ -0,0 +1,13 @@ +--# run #-- +--- text --- +| {}"" {}"{1:1, 2:2, 3:4, 4:6}" {}"" | +--- text --- +| {}"" {}"{1:1, 2:2, 3:4, 4:6}" {}"" | +--- text --- +| {}"" {}"{}" {}"" | +--- text --- +| {}"" {}"{1:1, 2:2, 3:4, 4:3, 5:9, 6:6}" {}"" | +--- return --- +() +--# saved #-- +{} \ No newline at end of file diff --git a/test/results/bracket multiline.ans b/test/results/bracket multiline.ans new file mode 100644 index 0000000..028cec2 --- /dev/null +++ b/test/results/bracket multiline.ans @@ -0,0 +1,13 @@ +--# run #-- +--- text --- +| {}"" {}"[1, 2, 4, 6]" {}"" | +--- text --- +| {}"" {}"[1, 2, 4, 6]" {}"" | +--- text --- +| {}"" {}"[]" {}"" | +--- text --- +| {}"" {}"[1, 2, 4, 3, 9, 6]" {}"" | +--- return --- +() +--# saved #-- +{} \ No newline at end of file diff --git a/test/results/parentheses multiline.ans b/test/results/parentheses multiline.ans new file mode 100644 index 0000000..ec0542a --- /dev/null +++ b/test/results/parentheses multiline.ans @@ -0,0 +1,13 @@ +--# run #-- +--- text --- +| {}"" {}"[1, 2, 4, 6]" {}"" | +--- text --- +| {}"" {}"[1, 2, 4, 6]" {}"" | +--- text --- +| {}"" {}"()" {}"" | +--- text --- +| {}"" {}"[1, 2, 4, 3, 9, 6]" {}"" | +--- return --- +() +--# saved #-- +{} \ No newline at end of file diff --git a/test/tests/brace multiline.ans b/test/tests/brace multiline.ans new file mode 100644 index 0000000..21ac2af --- /dev/null +++ b/test/tests/brace multiline.ans @@ -0,0 +1,15 @@ +|{{1,2,4, +6}} + +|{{ +1,2,4 +,6}} + +|{{ +}} + +|{{1,2,4, +/* hey */3, + 9 + /* hoy + ,6}} diff --git a/test/tests/bracket multiline.ans b/test/tests/bracket multiline.ans new file mode 100644 index 0000000..f9c957c --- /dev/null +++ b/test/tests/bracket multiline.ans @@ -0,0 +1,15 @@ +|{[1,2,4, +6]} + +|{[ +1,2,4 +,6]} + +|{[ +]} + +|{[1,2,4, +/* hey */3, + 9 + /* hoy + ,6]} diff --git a/test/tests/parentheses multiline.ans b/test/tests/parentheses multiline.ans new file mode 100644 index 0000000..d046b9e --- /dev/null +++ b/test/tests/parentheses multiline.ans @@ -0,0 +1,15 @@ +|{(1,2,4, +6)} + +|{( +1,2,4 +,6)} + +|{( +)} + +|{(1,2,4, +/* hey */3, + 9 + /* hoy + ,6)}