mirror of
https://github.com/Reuh/candran.git
synced 2025-10-27 17:59:30 +00:00
* Fixed HORRIBLE parsing bugs with short functions and right assignemnt operators * Allowed to omit then, do and end for some statements * Fixed hexa numbers parsing * Run the Lua 5.3 test suite through Candran, everything that should work worked! Yay! Lacks tests and README
686 lines
28 KiB
Lua
686 lines
28 KiB
Lua
--[[
|
|
This module implements a parser for Lua 5.3 with LPeg,
|
|
and generates an Abstract Syntax Tree that is similar to the one generated by Metalua.
|
|
For more information about Metalua, please, visit:
|
|
https://github.com/fab13n/metalua-parser
|
|
|
|
block: { stat* }
|
|
|
|
stat:
|
|
`Do{ stat* }
|
|
| `Set{ {lhs+} (opid? = opid?)? {expr+} } -- lhs1, lhs2... op=op e1, e2...
|
|
| `While{ expr block } -- while e do b end
|
|
| `Repeat{ block expr } -- repeat b until e
|
|
| `If{ (expr block)+ block? } -- if e1 then b1 [elseif e2 then b2] ... [else bn] end
|
|
| `Fornum{ ident expr expr expr? block } -- for ident = e, e[, e] do b end
|
|
| `Forin{ {ident+} {expr+} block } -- for i1, i2... in e1, e2... do b end
|
|
| `Local{ {ident+} {expr+}? } -- local i1, i2... = e1, e2...
|
|
| `Let{ {ident+} {expr+}? } -- let i1, i2... = e1, e2...
|
|
| `Localrec{ {ident} {expr} } -- only used for 'local function'
|
|
| `Goto{ <string> } -- goto str
|
|
| `Label{ <string> } -- ::str::
|
|
| `Return{ <expr*> } -- return e1, e2...
|
|
| `Break -- break
|
|
| `Push{ <exper*> } -- push
|
|
| `Continue -- continue
|
|
| apply
|
|
|
|
expr:
|
|
`Nil
|
|
| `Dots
|
|
| `Boolean{ <boolean> }
|
|
| `Number{ <string> } -- we don't use convert to number to avoid losing precision when tostring()-ing it later
|
|
| `String{ <string> }
|
|
| `Function{ { ( `ParPair{ Id expr } | `Id{ <string> } )* `Dots? } block }
|
|
| `Table{ ( `Pair{ expr expr } | expr )* }
|
|
| `Op{ opid expr expr? }
|
|
| `Paren{ expr } -- significant to cut multiple values returns
|
|
| `TableCompr{ block }
|
|
| statexpr
|
|
| apply
|
|
| lhs
|
|
|
|
statexpr:
|
|
`DoExpr{ stat* }
|
|
| `WhileExpr{ expr block } -- while e do b end
|
|
| `RepeatExpr{ block expr } -- repeat b until e
|
|
| `IfExpr{ (expr block)+ block? } -- if e1 then b1 [elseif e2 then b2] ... [else bn] end
|
|
| `FornumExpr{ ident expr expr expr? block } -- for ident = e, e[, e] do b end
|
|
| `ForinExpr{ {ident+} {expr+} block } -- for i1, i2... in e1, e2... do b end
|
|
|
|
apply:
|
|
`Call{ expr expr* }
|
|
| `Invoke{ expr `String{ <string> } expr* }
|
|
|
|
lhs: `Id{ <string> } | `Index{ expr expr }
|
|
|
|
opid: -- includes additional operators from Lua 5.3 and all relational operators
|
|
'add' | 'sub' | 'mul' | 'div'
|
|
| 'idiv' | 'mod' | 'pow' | 'concat'
|
|
| 'band' | 'bor' | 'bxor' | 'shl' | 'shr'
|
|
| 'eq' | 'ne' | 'lt' | 'gt' | 'le' | 'ge'
|
|
| 'and' | 'or' | 'unm' | 'len' | 'bnot' | 'not'
|
|
]]
|
|
|
|
local lpeg = require "lpeglabel"
|
|
|
|
lpeg.locale(lpeg)
|
|
|
|
local P, S, V = lpeg.P, lpeg.S, lpeg.V
|
|
local C, Carg, Cb, Cc = lpeg.C, lpeg.Carg, lpeg.Cb, lpeg.Cc
|
|
local Cf, Cg, Cmt, Cp, Cs, Ct = lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct
|
|
local Rec, T = lpeg.Rec, lpeg.T
|
|
|
|
local alpha, digit, alnum = lpeg.alpha, lpeg.digit, lpeg.alnum
|
|
local xdigit = lpeg.xdigit
|
|
local space = lpeg.space
|
|
|
|
|
|
-- error message auxiliary functions
|
|
|
|
local labels = {
|
|
{ "ErrExtra", "unexpected character(s), expected EOF" },
|
|
{ "ErrInvalidStat", "unexpected token, invalid start of statement" },
|
|
|
|
{ "ErrEndIf", "expected 'end' to close the if statement" },
|
|
{ "ErrExprIf", "expected a condition after 'if'" },
|
|
{ "ErrThenIf", "expected 'then' after the condition" },
|
|
{ "ErrExprEIf", "expected a condition after 'elseif'" },
|
|
{ "ErrThenEIf", "expected 'then' after the condition" },
|
|
|
|
{ "ErrEndDo", "expected 'end' to close the do block" },
|
|
{ "ErrExprWhile", "expected a condition after 'while'" },
|
|
{ "ErrDoWhile", "expected 'do' after the condition" },
|
|
{ "ErrEndWhile", "expected 'end' to close the while loop" },
|
|
{ "ErrUntilRep", "expected 'until' at the end of the repeat loop" },
|
|
{ "ErrExprRep", "expected a conditions after 'until'" },
|
|
|
|
{ "ErrForRange", "expected a numeric or generic range after 'for'" },
|
|
{ "ErrEndFor", "expected 'end' to close the for loop" },
|
|
{ "ErrExprFor1", "expected a starting expression for the numeric range" },
|
|
{ "ErrCommaFor", "expected ',' to split the start and end of the range" },
|
|
{ "ErrExprFor2", "expected an ending expression for the numeric range" },
|
|
{ "ErrExprFor3", "expected a step expression for the numeric range after ','" },
|
|
{ "ErrInFor", "expected '=' or 'in' after the variable(s)" },
|
|
{ "ErrEListFor", "expected one or more expressions after 'in'" },
|
|
{ "ErrDoFor", "expected 'do' after the range of the for loop" },
|
|
|
|
{ "ErrDefLocal", "expected a function definition or assignment after local" },
|
|
{ "ErrDefLet", "expected a function definition or assignment after let" },
|
|
{ "ErrNameLFunc", "expected a function name after 'function'" },
|
|
{ "ErrEListLAssign", "expected one or more expressions after '='" },
|
|
{ "ErrEListAssign", "expected one or more expressions after '='" },
|
|
|
|
{ "ErrFuncName", "expected a function name after 'function'" },
|
|
{ "ErrNameFunc1", "expected a function name after '.'" },
|
|
{ "ErrNameFunc2", "expected a method name after ':'" },
|
|
{ "ErrOParenPList", "expected '(' for the parameter list" },
|
|
{ "ErrCParenPList", "expected ')' to close the parameter list" },
|
|
{ "ErrEndFunc", "expected 'end' to close the function body" },
|
|
{ "ErrParList", "expected a variable name or '...' after ','" },
|
|
|
|
{ "ErrLabel", "expected a label name after '::'" },
|
|
{ "ErrCloseLabel", "expected '::' after the label" },
|
|
{ "ErrGoto", "expected a label after 'goto'" },
|
|
{ "ErrRetList", "expected an expression after ',' in the return statement" },
|
|
|
|
{ "ErrVarList", "expected a variable name after ','" },
|
|
{ "ErrExprList", "expected an expression after ','" },
|
|
|
|
{ "ErrOrExpr", "expected an expression after 'or'" },
|
|
{ "ErrAndExpr", "expected an expression after 'and'" },
|
|
{ "ErrRelExpr", "expected an expression after the relational operator" },
|
|
{ "ErrBOrExpr", "expected an expression after '|'" },
|
|
{ "ErrBXorExpr", "expected an expression after '~'" },
|
|
{ "ErrBAndExpr", "expected an expression after '&'" },
|
|
{ "ErrShiftExpr", "expected an expression after the bit shift" },
|
|
{ "ErrConcatExpr", "expected an expression after '..'" },
|
|
{ "ErrAddExpr", "expected an expression after the additive operator" },
|
|
{ "ErrMulExpr", "expected an expression after the multiplicative operator" },
|
|
{ "ErrUnaryExpr", "expected an expression after the unary operator" },
|
|
{ "ErrPowExpr", "expected an expression after '^'" },
|
|
|
|
{ "ErrExprParen", "expected an expression after '('" },
|
|
{ "ErrCParenExpr", "expected ')' to close the expression" },
|
|
{ "ErrNameIndex", "expected a field name after '.'" },
|
|
{ "ErrExprIndex", "expected an expression after '['" },
|
|
{ "ErrCBracketIndex", "expected ']' to close the indexing expression" },
|
|
{ "ErrNameMeth", "expected a method name after ':'" },
|
|
{ "ErrMethArgs", "expected some arguments for the method call (or '()')" },
|
|
|
|
{ "ErrArgList", "expected an expression after ',' in the argument list" },
|
|
{ "ErrCParenArgs", "expected ')' to close the argument list" },
|
|
|
|
{ "ErrCBraceTable", "expected '}' to close the table constructor" },
|
|
{ "ErrEqField", "expected '=' after the table key" },
|
|
{ "ErrExprField", "expected an expression after '='" },
|
|
{ "ErrExprFKey", "expected an expression after '[' for the table key" },
|
|
{ "ErrCBracketFKey", "expected ']' to close the table key" },
|
|
|
|
{ "ErrCBracketTableCompr", "expected ']' to close the table comprehension" },
|
|
|
|
{ "ErrDigitHex", "expected one or more hexadecimal digits after '0x'" },
|
|
{ "ErrDigitDeci", "expected one or more digits after the decimal point" },
|
|
{ "ErrDigitExpo", "expected one or more digits for the exponent" },
|
|
|
|
{ "ErrQuote", "unclosed string" },
|
|
{ "ErrHexEsc", "expected exactly two hexadecimal digits after '\\x'" },
|
|
{ "ErrOBraceUEsc", "expected '{' after '\\u'" },
|
|
{ "ErrDigitUEsc", "expected one or more hexadecimal digits for the UTF-8 code point" },
|
|
{ "ErrCBraceUEsc", "expected '}' after the code point" },
|
|
{ "ErrEscSeq", "invalid escape sequence" },
|
|
{ "ErrCloseLStr", "unclosed long string" },
|
|
}
|
|
|
|
local function throw(label)
|
|
label = "Err" .. label
|
|
for i, labelinfo in ipairs(labels) do
|
|
if labelinfo[1] == label then
|
|
return T(i)
|
|
end
|
|
end
|
|
|
|
error("Label not found: " .. label)
|
|
end
|
|
|
|
local function expect (patt, label)
|
|
return patt + throw(label)
|
|
end
|
|
|
|
|
|
-- regular combinators and auxiliary functions
|
|
|
|
local function token (patt)
|
|
return patt * V"Skip"
|
|
end
|
|
|
|
local function sym (str)
|
|
return token(P(str))
|
|
end
|
|
|
|
local function kw (str)
|
|
return token(P(str) * -V"IdRest")
|
|
end
|
|
|
|
local function tagC (tag, patt)
|
|
return Ct(Cg(Cp(), "pos") * Cg(Cc(tag), "tag") * patt)
|
|
end
|
|
|
|
local function unaryOp (op, e)
|
|
return { tag = "Op", pos = e.pos, [1] = op, [2] = e }
|
|
end
|
|
|
|
local function binaryOp (e1, op, e2)
|
|
if not op then
|
|
return e1
|
|
else
|
|
return { tag = "Op", pos = e1.pos, [1] = op, [2] = e1, [3] = e2 }
|
|
end
|
|
end
|
|
|
|
local function sepBy (patt, sep, label)
|
|
if label then
|
|
return patt * Cg(sep * expect(patt, label))^0
|
|
else
|
|
return patt * Cg(sep * patt)^0
|
|
end
|
|
end
|
|
|
|
local function chainOp (patt, sep, label)
|
|
return Cf(sepBy(patt, sep, label), binaryOp)
|
|
end
|
|
|
|
local function commaSep (patt, label)
|
|
return sepBy(patt, sym(","), label)
|
|
end
|
|
|
|
local function tagDo (block)
|
|
block.tag = "Do"
|
|
return block
|
|
end
|
|
|
|
local function fixFuncStat (func)
|
|
if func[1].is_method then table.insert(func[2][1], 1, { tag = "Id", [1] = "self" }) end
|
|
func[1] = {func[1]}
|
|
func[2] = {func[2]}
|
|
return func
|
|
end
|
|
|
|
local function addDots (params, dots)
|
|
if dots then table.insert(params, dots) end
|
|
return params
|
|
end
|
|
|
|
local function insertIndex (t, index)
|
|
return { tag = "Index", pos = t.pos, [1] = t, [2] = index }
|
|
end
|
|
|
|
local function markMethod(t, method)
|
|
if method then
|
|
return { tag = "Index", pos = t.pos, is_method = true, [1] = t, [2] = method }
|
|
end
|
|
return t
|
|
end
|
|
|
|
local function makeIndexOrCall (t1, t2)
|
|
if t2.tag == "Call" or t2.tag == "Invoke" then
|
|
local t = { tag = t2.tag, pos = t1.pos, [1] = t1 }
|
|
for k, v in ipairs(t2) do
|
|
table.insert(t, v)
|
|
end
|
|
return t
|
|
end
|
|
return { tag = "Index", pos = t1.pos, [1] = t1, [2] = t2[1] }
|
|
end
|
|
|
|
local function fixShortFunc(t)
|
|
if t[1] == ":" then -- self method
|
|
table.insert(t[2], 1, { tag = "Id", "self" })
|
|
table.remove(t, 1)
|
|
t.is_method = true
|
|
end
|
|
t.is_short = true
|
|
return t
|
|
end
|
|
|
|
local function statToExpr(t) -- tag a StatExpr
|
|
t.tag = t.tag .. "Expr"
|
|
return t
|
|
end
|
|
|
|
local function fixStructure(t) -- fix the AST structure if needed
|
|
local i = 1
|
|
while i <= #t do
|
|
if type(t[i]) == "table" then
|
|
fixStructure(t[i])
|
|
for j=#t[i], 1, -1 do
|
|
local stat = t[i][j]
|
|
if type(stat) == "table" and stat.move_up_block and stat.move_up_block > 0 then
|
|
table.remove(t[i], j)
|
|
table.insert(t, i+1, stat)
|
|
if t.tag == "Block" or t.tag == "Do" then
|
|
stat.move_up_block = stat.move_up_block - 1
|
|
end
|
|
end
|
|
end
|
|
end
|
|
i = i + 1
|
|
end
|
|
return t
|
|
end
|
|
|
|
local function searchEndRec(block, isRecCall) -- recursively search potential "end" keyword wrongly consumed by a short anonymous function on stat end (yeah, too late to change the syntax to something easier to parse)
|
|
for i, stat in ipairs(block) do
|
|
-- Non recursive statements
|
|
if stat.tag == "Set" or stat.tag == "Push" or stat.tag == "Return" or stat.tag == "Local" or stat.tag == "Let" or stat.tag == "Localrec" then
|
|
local exprlist
|
|
|
|
if stat.tag == "Set" or stat.tag == "Local" or stat.tag == "Let" or stat.tag == "Localrec" then
|
|
exprlist = stat[#stat]
|
|
elseif stat.tag == "Push" or stat.tag == "Return" then
|
|
exprlist = stat
|
|
end
|
|
|
|
local last = exprlist[#exprlist] -- last value in ExprList
|
|
|
|
-- Stuff parse shittily only for short function declaration which are not method and whith strictly one variable name between the parenthesis.
|
|
-- Otherwise it's invalid Lua anyway, so not my problem.
|
|
if last.tag == "Function" and last.is_short and not last.is_method and #last[1] == 1 then
|
|
local p = i
|
|
for j, fstat in ipairs(last[2]) do
|
|
p = i + j
|
|
table.insert(block, p, fstat) -- copy stats from func body to block
|
|
|
|
if stat.move_up_block then -- extracted stats inherit move_up_block from statement
|
|
fstat.move_up_block = (fstat.move_up_block or 0) + stat.move_up_block
|
|
end
|
|
|
|
if block.is_singlestatblock then -- if it's a single stat block, mark them to move them outside of the block
|
|
fstat.move_up_block = (fstat.move_up_block or 0) + 1
|
|
end
|
|
end
|
|
|
|
exprlist[#exprlist] = last[1] -- replace func with paren and expressions
|
|
exprlist[#exprlist].tag = "Paren"
|
|
|
|
if not isRecCall then -- if superfluous statements won't be moved in a next recursion, let fixStructure handle things
|
|
for j=p+1, #block, 1 do
|
|
block[j].move_up_block = (block[j].move_up_block or 0) + 1
|
|
end
|
|
end
|
|
|
|
return block, i
|
|
|
|
-- I lied, stuff can also be recursive here (StatExpr & Function)
|
|
elseif last.tag:match("Expr$") then
|
|
local r = searchEndRec({ last })
|
|
if r then
|
|
for j=2, #r, 1 do
|
|
table.insert(block, i+j-1, r[j]) -- move back superflous statements from our new table to our real block
|
|
end
|
|
return block, i
|
|
end
|
|
elseif last.tag == "Function" then
|
|
local r = searchEndRec(last[2])
|
|
if r then
|
|
return block, i
|
|
end
|
|
end
|
|
|
|
-- Recursive statements
|
|
elseif stat.tag:match("^If") or stat.tag:match("^While") or stat.tag:match("^Repeat") or stat.tag:match("^Do") or stat.tag:match("^Fornum") or stat.tag:match("^Forin") then
|
|
local blocks
|
|
|
|
if stat.tag:match("^If") or stat.tag:match("^While") or stat.tag:match("^Repeat") or stat.tag:match("^Fornum") or stat.tag:match("^Forin") then
|
|
blocks = stat
|
|
elseif stat.tag:match("^Do") then
|
|
blocks = { stat }
|
|
end
|
|
|
|
for _, iblock in ipairs(blocks) do
|
|
if iblock.tag == "Block" then -- blocks
|
|
local oldLen = #iblock
|
|
local newiBlock, newEnd = searchEndRec(iblock, true)
|
|
if newiBlock then -- if end in the block
|
|
local p = i
|
|
for j=newEnd+(#iblock-oldLen)+1, #iblock, 1 do -- move all statements after the newely added statements to the parent block
|
|
p = p + 1
|
|
table.insert(block, p, iblock[j])
|
|
iblock[j] = nil
|
|
end
|
|
|
|
if not isRecCall then -- if superfluous statements won't be moved in a next recursion, let fixStructure handle things
|
|
for j=p+1, #block, 1 do
|
|
block[j].move_up_block = (block[j].move_up_block or 0) + 1
|
|
end
|
|
end
|
|
|
|
return block, i
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
return nil
|
|
end
|
|
|
|
local function searchEnd(s, p, t) -- match time capture which try to restructure the AST to free an "end" for us
|
|
local r = searchEndRec(fixStructure(t))
|
|
if not r then
|
|
return false
|
|
end
|
|
return true, r
|
|
end
|
|
|
|
local function expectBlockOrSingleStatWithStartEnd(start, startLabel, stopLabel, canFollow) -- will try a SingleStat if start doesn't match
|
|
if canFollow then
|
|
return (-start * V"SingleStatBlock" * canFollow^-1)
|
|
+ (expect(start, startLabel) * ((V"Block" * (canFollow + kw("end")))
|
|
+ (Cmt(V"Block", searchEnd) + throw(stopLabel))))
|
|
else
|
|
return (-start * V"SingleStatBlock")
|
|
+ (expect(start, startLabel) * ((V"Block" * kw("end"))
|
|
+ (Cmt(V"Block", searchEnd) + throw(stopLabel))))
|
|
end
|
|
end
|
|
|
|
local function expectBlockWithEnd(label) -- can't work *optionnaly* with SingleStat unfortunatly
|
|
return (V"Block" * kw("end"))
|
|
+ (Cmt(V"Block", searchEnd) + throw(label))
|
|
end
|
|
|
|
local function maybeBlockWithEnd() -- same as above but don't error if it doesn't match
|
|
return (V"BlockNoErr" * kw("end"))
|
|
+ Cmt(V"BlockNoErr", searchEnd)
|
|
end
|
|
|
|
-- grammar
|
|
local G = { V"Lua",
|
|
Lua = (V"Shebang"^-1 * V"Skip" * V"Block" * expect(P(-1), "Extra")) / fixStructure;
|
|
Shebang = P"#!" * (P(1) - P"\n")^0;
|
|
|
|
Block = tagC("Block", (V"Stat" + -V"BlockEnd" * throw("InvalidStat"))^0 * ((V"RetStat" + V"ImplicitPushStat") * sym(";")^-1)^-1);
|
|
Stat = V"IfStat" + V"DoStat" + V"WhileStat" + V"RepeatStat" + V"ForStat"
|
|
+ V"LocalStat" + V"FuncStat" + V"BreakStat" + V"LabelStat" + V"GoToStat"
|
|
+ V"FuncCall" + V"Assignment"
|
|
+ V"LetStat" + V"ContinueStat" + V"PushStat"
|
|
+ sym(";");
|
|
BlockEnd = P"return" + "end" + "elseif" + "else" + "until" + "]" + -1 + V"ImplicitPushStat" + V"Assignment";
|
|
|
|
SingleStatBlock = tagC("Block", V"Stat" + V"RetStat" + V"ImplicitPushStat") / function(t) t.is_singlestatblock = true return t end;
|
|
BlockNoErr = tagC("Block", V"Stat"^0 * ((V"RetStat" + V"ImplicitPushStat") * sym(";")^-1)^-1); -- used to check if something a valid block without throwing an error
|
|
|
|
IfStat = tagC("If", V"IfPart");
|
|
IfPart = kw("if") * expect(V"Expr", "ExprIf") * expectBlockOrSingleStatWithStartEnd(kw("then"), "ThenIf", "EndIf", V"ElseIfPart" + V"ElsePart");
|
|
ElseIfPart = kw("elseif") * expect(V"Expr", "ExprEIf") * expectBlockOrSingleStatWithStartEnd(kw("then"), "ThenEIf", "EndIf", V"ElseIfPart" + V"ElsePart");
|
|
ElsePart = kw("else") * expectBlockWithEnd("EndIf");
|
|
|
|
DoStat = kw("do") * expectBlockWithEnd("EndDo") / tagDo;
|
|
WhileStat = tagC("While", kw("while") * expect(V"Expr", "ExprWhile") * V"WhileBody");
|
|
WhileBody = expectBlockOrSingleStatWithStartEnd(kw("do"), "DoWhile", "EndWhile");
|
|
RepeatStat = tagC("Repeat", kw("repeat") * V"Block" * expect(kw("until"), "UntilRep") * expect(V"Expr", "ExprRep"));
|
|
|
|
ForStat = kw("for") * expect(V"ForNum" + V"ForIn", "ForRange");
|
|
ForNum = tagC("Fornum", V"Id" * sym("=") * V"NumRange" * V"ForBody");
|
|
NumRange = expect(V"Expr", "ExprFor1") * expect(sym(","), "CommaFor") *expect(V"Expr", "ExprFor2")
|
|
* (sym(",") * expect(V"Expr", "ExprFor3"))^-1;
|
|
ForIn = tagC("Forin", V"NameList" * expect(kw("in"), "InFor") * expect(V"ExprList", "EListFor") * V"ForBody");
|
|
ForBody = expectBlockOrSingleStatWithStartEnd(kw("do"), "DoFor", "EndFor");
|
|
|
|
LocalStat = kw("local") * expect(V"LocalFunc" + V"LocalAssign", "DefLocal");
|
|
LocalFunc = tagC("Localrec", kw("function") * expect(V"Id", "NameLFunc") * V"FuncBody") / fixFuncStat;
|
|
LocalAssign = tagC("Local", V"NameList" * (sym("=") * expect(V"ExprList", "EListLAssign") + Ct(Cc())));
|
|
|
|
LetStat = kw("let") * expect(V"LetAssign", "DefLet");
|
|
LetAssign = tagC("Let", V"NameList" * (sym("=") * expect(V"ExprList", "EListLAssign") + Ct(Cc())));
|
|
|
|
Assignment = tagC("Set", V"VarList" * V"BinOp"^-1 * (P"=" / "=") * V"BinOp"^-1 * V"Skip" * expect(V"ExprList", "EListAssign"));
|
|
|
|
FuncStat = tagC("Set", kw("function") * expect(V"FuncName", "FuncName") * V"FuncBody") / fixFuncStat;
|
|
FuncName = Cf(V"Id" * (sym(".") * expect(V"StrId", "NameFunc1"))^0, insertIndex)
|
|
* (sym(":") * expect(V"StrId", "NameFunc2"))^-1 / markMethod;
|
|
FuncBody = tagC("Function", V"FuncParams" * expectBlockWithEnd("EndFunc"));
|
|
FuncParams = expect(sym("("), "OParenPList") * V"ParList" * expect(sym(")"), "CParenPList");
|
|
ParList = V"NamedParList" * (sym(",") * expect(tagC("Dots", sym("...")), "ParList"))^-1 / addDots
|
|
+ Ct(tagC("Dots", sym("...")))
|
|
+ Ct(Cc()); -- Cc({}) generates a bug since the {} would be shared across parses
|
|
|
|
ShortFuncDef = tagC("Function", V"ShortFuncParams" * maybeBlockWithEnd()) / fixShortFunc;
|
|
ShortFuncParams = (sym(":") / ":")^-1 * sym("(") * V"ParList" * sym(")");
|
|
|
|
NamedParList = tagC("NamedParList", commaSep(V"NamedPar"));
|
|
NamedPar = tagC("ParPair", V"ParKey" * expect(sym("="), "EqField") * expect(V"Expr", "ExprField"))
|
|
+ V"Id";
|
|
ParKey = V"Id" * #("=" * -P"=");
|
|
|
|
LabelStat = tagC("Label", sym("::") * expect(V"Name", "Label") * expect(sym("::"), "CloseLabel"));
|
|
GoToStat = tagC("Goto", kw("goto") * expect(V"Name", "Goto"));
|
|
BreakStat = tagC("Break", kw("break"));
|
|
ContinueStat = tagC("Continue", kw("continue"));
|
|
RetStat = tagC("Return", kw("return") * commaSep(V"Expr", "RetList")^-1);
|
|
|
|
PushStat = tagC("Push", kw("push") * commaSep(V"Expr", "RetList")^-1);
|
|
ImplicitPushStat = tagC("Push", commaSep(V"Expr", "RetList"));
|
|
|
|
NameList = tagC("NameList", commaSep(V"Id"));
|
|
VarList = tagC("VarList", commaSep(V"VarExpr"));
|
|
ExprList = tagC("ExpList", commaSep(V"Expr", "ExprList"));
|
|
|
|
Expr = V"OrExpr";
|
|
OrExpr = chainOp(V"AndExpr", V"OrOp", "OrExpr");
|
|
AndExpr = chainOp(V"RelExpr", V"AndOp", "AndExpr");
|
|
RelExpr = chainOp(V"BOrExpr", V"RelOp", "RelExpr");
|
|
BOrExpr = chainOp(V"BXorExpr", V"BOrOp", "BOrExpr");
|
|
BXorExpr = chainOp(V"BAndExpr", V"BXorOp", "BXorExpr");
|
|
BAndExpr = chainOp(V"ShiftExpr", V"BAndOp", "BAndExpr");
|
|
ShiftExpr = chainOp(V"ConcatExpr", V"ShiftOp", "ShiftExpr");
|
|
ConcatExpr = V"AddExpr" * (V"ConcatOp" * expect(V"ConcatExpr", "ConcatExpr"))^-1 / binaryOp;
|
|
AddExpr = chainOp(V"MulExpr", V"AddOp", "AddExpr");
|
|
MulExpr = chainOp(V"UnaryExpr", V"MulOp", "MulExpr");
|
|
UnaryExpr = V"UnaryOp" * expect(V"UnaryExpr", "UnaryExpr") / unaryOp
|
|
+ V"PowExpr";
|
|
PowExpr = V"SimpleExpr" * (V"PowOp" * expect(V"UnaryExpr", "PowExpr"))^-1 / binaryOp;
|
|
|
|
SimpleExpr = tagC("Number", V"Number")
|
|
+ tagC("String", V"String")
|
|
+ tagC("Nil", kw("nil"))
|
|
+ tagC("Boolean", kw("false") * Cc(false))
|
|
+ tagC("Boolean", kw("true") * Cc(true))
|
|
+ tagC("Dots", sym("..."))
|
|
+ V"FuncDef"
|
|
+ V"Table"
|
|
+ V"ShortFuncDef"
|
|
+ V"SuffixedExpr"
|
|
+ V"TableCompr"
|
|
+ V"StatExpr";
|
|
|
|
StatExpr = (V"IfStat" + V"DoStat" + V"WhileStat" + V"RepeatStat" + V"ForStat") / statToExpr;
|
|
|
|
FuncCall = Cmt(V"SuffixedExpr", function(s, i, exp) return exp.tag == "Call" or exp.tag == "Invoke", exp end);
|
|
VarExpr = Cmt(V"SuffixedExpr", function(s, i, exp) return exp.tag == "Id" or exp.tag == "Index", exp end);
|
|
|
|
SuffixedExpr = Cf(V"PrimaryExpr" * (V"Index" + V"Call")^0, makeIndexOrCall);
|
|
PrimaryExpr = V"SelfId" * (V"SelfCall" + V"SelfIndex")
|
|
+ V"Id"
|
|
+ tagC("Paren", sym("(") * expect(V"Expr", "ExprParen") * expect(sym(")"), "CParenExpr"));
|
|
Index = tagC("DotIndex", sym("." * -P".") * expect(V"StrId", "NameIndex"))
|
|
+ tagC("ArrayIndex", sym("[" * -P(S"=[")) * expect(V"Expr", "ExprIndex") * expect(sym("]"), "CBracketIndex"));
|
|
Call = tagC("Invoke", Cg(sym(":" * -P":") * expect(V"StrId", "NameMeth") * expect(V"FuncArgs", "MethArgs")))
|
|
+ tagC("Call", V"FuncArgs");
|
|
SelfIndex = tagC("DotIndex", V"StrId");
|
|
SelfCall = tagC("Invoke", Cg(V"StrId" * V"FuncArgs"));
|
|
|
|
FuncDef = (kw("function") * V"FuncBody");
|
|
FuncArgs = sym("(") * commaSep(V"Expr", "ArgList")^-1 * expect(sym(")"), "CParenArgs")
|
|
+ V"Table"
|
|
+ tagC("String", V"String");
|
|
|
|
Table = tagC("Table", sym("{") * V"FieldList"^-1 * expect(sym("}"), "CBraceTable"));
|
|
FieldList = sepBy(V"Field", V"FieldSep") * V"FieldSep"^-1;
|
|
Field = tagC("Pair", V"FieldKey" * expect(sym("="), "EqField") * expect(V"Expr", "ExprField"))
|
|
+ V"Expr";
|
|
FieldKey = sym("[" * -P(S"=[")) * expect(V"Expr", "ExprFKey") * expect(sym("]"), "CBracketFKey")
|
|
+ V"StrId" * #("=" * -P"=");
|
|
FieldSep = sym(",") + sym(";");
|
|
|
|
TableCompr = tagC("TableCompr", sym("[") * V"Block" * expect(sym("]"), "CBracketTableCompr"));
|
|
|
|
SelfId = tagC("Id", sym"@" / "self");
|
|
Id = tagC("Id", V"Name") + V"SelfId";
|
|
StrId = tagC("String", V"Name");
|
|
|
|
-- lexer
|
|
Skip = (V"Space" + V"Comment")^0;
|
|
Space = space^1;
|
|
Comment = P"--" * V"LongStr" / function () return end
|
|
+ P"--" * (P(1) - P"\n")^0;
|
|
|
|
Name = token(-V"Reserved" * C(V"Ident"));
|
|
Reserved = V"Keywords" * -V"IdRest";
|
|
Keywords = P"and" + "break" + "do" + "elseif" + "else" + "end"
|
|
+ "false" + "for" + "function" + "goto" + "if" + "in"
|
|
+ "local" + "nil" + "not" + "or" + "repeat" + "return"
|
|
+ "then" + "true" + "until" + "while";
|
|
Ident = V"IdStart" * V"IdRest"^0;
|
|
IdStart = alpha + P"_";
|
|
IdRest = alnum + P"_";
|
|
|
|
Number = token(C(V"Hex" + V"Float" + V"Int"));
|
|
Hex = (P"0x" + "0X") * ((xdigit^0 * V"DeciHex") + (expect(xdigit^1, "DigitHex") * V"DeciHex"^-1)) * V"ExpoHex"^-1;
|
|
Float = V"Decimal" * V"Expo"^-1
|
|
+ V"Int" * V"Expo";
|
|
Decimal = digit^1 * "." * digit^0
|
|
+ P"." * -P"." * expect(digit^1, "DigitDeci");
|
|
DeciHex = P"." * xdigit^0;
|
|
Expo = S"eE" * S"+-"^-1 * expect(digit^1, "DigitExpo");
|
|
ExpoHex = S"pP" * S"+-"^-1 * expect(xdigit^1, "DigitExpo");
|
|
Int = digit^1;
|
|
|
|
String = token(V"ShortStr" + V"LongStr");
|
|
ShortStr = P'"' * Cs((V"EscSeq" + (P(1)-S'"\n'))^0) * expect(P'"', "Quote")
|
|
+ P"'" * Cs((V"EscSeq" + (P(1)-S"'\n"))^0) * expect(P"'", "Quote");
|
|
|
|
EscSeq = P"\\" / "" -- remove backslash
|
|
* ( P"a" / "\a"
|
|
+ P"b" / "\b"
|
|
+ P"f" / "\f"
|
|
+ P"n" / "\n"
|
|
+ P"r" / "\r"
|
|
+ P"t" / "\t"
|
|
+ P"v" / "\v"
|
|
|
|
+ P"\n" / "\n"
|
|
+ P"\r" / "\n"
|
|
|
|
+ P"\\" / "\\"
|
|
+ P"\"" / "\""
|
|
+ P"\'" / "\'"
|
|
|
|
+ P"z" * space^0 / ""
|
|
|
|
+ digit * digit^-2 / tonumber / string.char
|
|
+ P"x" * expect(C(xdigit * xdigit), "HexEsc") * Cc(16) / tonumber / string.char
|
|
+ P"u" * expect("{", "OBraceUEsc")
|
|
* expect(C(xdigit^1), "DigitUEsc") * Cc(16)
|
|
* expect("}", "CBraceUEsc")
|
|
/ tonumber
|
|
/ (utf8 and utf8.char or string.char) -- true max is \u{10FFFF}
|
|
-- utf8.char needs Lua 5.3
|
|
-- string.char works only until \u{FF}
|
|
|
|
+ throw("EscSeq")
|
|
);
|
|
|
|
LongStr = V"Open" * C((P(1) - V"CloseEq")^0) * expect(V"Close", "CloseLStr") / function (s, eqs) return s end;
|
|
Open = "[" * Cg(V"Equals", "openEq") * "[" * P"\n"^-1;
|
|
Close = "]" * C(V"Equals") * "]";
|
|
Equals = P"="^0;
|
|
CloseEq = Cmt(V"Close" * Cb("openEq"), function (s, i, closeEq, openEq) return #openEq == #closeEq end);
|
|
|
|
OrOp = kw("or") / "or";
|
|
AndOp = kw("and") / "and";
|
|
RelOp = sym("~=") / "ne"
|
|
+ sym("==") / "eq"
|
|
+ sym("<=") / "le"
|
|
+ sym(">=") / "ge"
|
|
+ sym("<") / "lt"
|
|
+ sym(">") / "gt";
|
|
BOrOp = sym("|") / "bor";
|
|
BXorOp = sym("~" * -P"=") / "bxor";
|
|
BAndOp = sym("&") / "band";
|
|
ShiftOp = sym("<<") / "shl"
|
|
+ sym(">>") / "shr";
|
|
ConcatOp = sym("..") / "concat";
|
|
AddOp = sym("+") / "add"
|
|
+ sym("-") / "sub";
|
|
MulOp = sym("*") / "mul"
|
|
+ sym("//") / "idiv"
|
|
+ sym("/") / "div"
|
|
+ sym("%") / "mod";
|
|
UnaryOp = kw("not") / "not"
|
|
+ sym("-") / "unm"
|
|
+ sym("#") / "len"
|
|
+ sym("~") / "bnot";
|
|
PowOp = sym("^") / "pow";
|
|
BinOp = V"OrOp" + V"AndOp" + V"BOrOp" + V"BXorOp" + V"BAndOp" + V"ShiftOp" + V"ConcatOp" + V"AddOp" + V"MulOp" + V"PowOp";
|
|
}
|
|
|
|
local parser = {}
|
|
|
|
local validator = require("lib.lua-parser.validator")
|
|
local validate = validator.validate
|
|
local syntaxerror = validator.syntaxerror
|
|
|
|
function parser.parse (subject, filename)
|
|
local errorinfo = { subject = subject, filename = filename }
|
|
lpeg.setmaxstack(1000)
|
|
local ast, label, sfail = lpeg.match(G, subject, nil, errorinfo)
|
|
if not ast then
|
|
local errpos = #subject-#sfail+1
|
|
local errmsg = labels[label][2]
|
|
return ast, syntaxerror(errorinfo, errpos, errmsg)
|
|
end
|
|
return validate(ast, errorinfo)
|
|
end
|
|
|
|
return parser
|