--[[ This module implements a parser for Lua 5.3 with LPeg, and generates an Abstract Syntax Tree that is similar to the one generated by Metalua. For more information about Metalua, please, visit: https://github.com/fab13n/metalua-parser block: { stat* } stat: `Do{ stat* } | `Set{ {lhs+} (opid? = opid?)? {expr+} } -- lhs1, lhs2... op=op e1, e2... | `While{ expr block } -- while e do b end | `Repeat{ block expr } -- repeat b until e | `If{ (lexpr block)+ block? } -- if e1 then b1 [elseif e2 then b2] ... [else bn] end | `Fornum{ ident expr expr expr? block } -- for ident = e, e[, e] do b end | `Forin{ {ident+} {expr+} block } -- for i1, i2... in e1, e2... do b end | `Local{ {ident+} {expr+}? } -- local i1, i2... = e1, e2... | `Let{ {ident+} {expr+}? } -- let i1, i2... = e1, e2... | `Localrec{ {ident} {expr} } -- only used for 'local function' | `Goto{ } -- goto str | `Label{ } -- ::str:: | `Return{ } -- return e1, e2... | `Break -- break | `Push{ } -- push | `Continue -- continue | apply expr: `Nil | `Dots | `Boolean{ } | `Number{ } -- we don't use convert to number to avoid losing precision when tostring()-ing it later | `String{ } | `Function{ { ( `ParPair{ Id expr } | `Id{ } )* `Dots? } block } | `Table{ ( `Pair{ expr expr } | expr )* } | `Op{ opid expr expr? } | `Paren{ expr } -- significant to cut multiple values returns | `TableCompr{ block } | `MethodStub{ expr expr } | `SafeMethodStub{ expr expr } | `SafeIndex{ expr expr } | statexpr | apply | lhs lexpr: `LetExpr{ {ident+} {expr+}? } | every node from expr statexpr: `DoExpr{ stat* } | `WhileExpr{ expr block } -- while e do b end | `RepeatExpr{ block expr } -- repeat b until e | `IfExpr{ (expr block)+ block? } -- if e1 then b1 [elseif e2 then b2] ... [else bn] end | `FornumExpr{ ident expr expr expr? block } -- for ident = e, e[, e] do b end | `ForinExpr{ {ident+} {expr+} block } -- for i1, i2... in e1, e2... do b end apply: `Call{ expr expr* } | `SafeCall{ expr expr* } lhs: `Id{ } | `Index{ expr expr } | ˇDestructuringId{ Id | Pair+ } opid: -- includes additional operators from Lua 5.3 and all relational operators 'add' | 'sub' | 'mul' | 'div' | 'idiv' | 'mod' | 'pow' | 'concat' | 'band' | 'bor' | 'bxor' | 'shl' | 'shr' | 'eq' | 'ne' | 'lt' | 'gt' | 'le' | 'ge' | 'and' | 'or' | 'unm' | 'len' | 'bnot' | 'not' ]] local lpeg = require "lpeglabel" lpeg.locale(lpeg) local P, S, V = lpeg.P, lpeg.S, lpeg.V local C, Carg, Cb, Cc = lpeg.C, lpeg.Carg, lpeg.Cb, lpeg.Cc local Cf, Cg, Cmt, Cp, Cs, Ct = lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct local Rec, T = lpeg.Rec, lpeg.T local alpha, digit, alnum = lpeg.alpha, lpeg.digit, lpeg.alnum local xdigit = lpeg.xdigit local space = lpeg.space -- error message auxiliary functions local labels = { { "ErrExtra", "unexpected character(s), expected EOF" }, { "ErrInvalidStat", "unexpected token, invalid start of statement" }, { "ErrEndIf", "expected 'end' to close the if statement" }, { "ErrExprIf", "expected a condition after 'if'" }, { "ErrThenIf", "expected 'then' after the condition" }, { "ErrExprEIf", "expected a condition after 'elseif'" }, { "ErrThenEIf", "expected 'then' after the condition" }, { "ErrEndDo", "expected 'end' to close the do block" }, { "ErrExprWhile", "expected a condition after 'while'" }, { "ErrDoWhile", "expected 'do' after the condition" }, { "ErrEndWhile", "expected 'end' to close the while loop" }, { "ErrUntilRep", "expected 'until' at the end of the repeat loop" }, { "ErrExprRep", "expected a conditions after 'until'" }, { "ErrForRange", "expected a numeric or generic range after 'for'" }, { "ErrEndFor", "expected 'end' to close the for loop" }, { "ErrExprFor1", "expected a starting expression for the numeric range" }, { "ErrCommaFor", "expected ',' to split the start and end of the range" }, { "ErrExprFor2", "expected an ending expression for the numeric range" }, { "ErrExprFor3", "expected a step expression for the numeric range after ','" }, { "ErrInFor", "expected '=' or 'in' after the variable(s)" }, { "ErrEListFor", "expected one or more expressions after 'in'" }, { "ErrDoFor", "expected 'do' after the range of the for loop" }, { "ErrDefLocal", "expected a function definition or assignment after local" }, { "ErrDefLet", "expected a function definition or assignment after let" }, { "ErrNameLFunc", "expected a function name after 'function'" }, { "ErrEListLAssign", "expected one or more expressions after '='" }, { "ErrEListAssign", "expected one or more expressions after '='" }, { "ErrFuncName", "expected a function name after 'function'" }, { "ErrNameFunc1", "expected a function name after '.'" }, { "ErrNameFunc2", "expected a method name after ':'" }, { "ErrOParenPList", "expected '(' for the parameter list" }, { "ErrCParenPList", "expected ')' to close the parameter list" }, { "ErrEndFunc", "expected 'end' to close the function body" }, { "ErrParList", "expected a variable name or '...' after ','" }, { "ErrLabel", "expected a label name after '::'" }, { "ErrCloseLabel", "expected '::' after the label" }, { "ErrGoto", "expected a label after 'goto'" }, { "ErrRetList", "expected an expression after ',' in the return statement" }, { "ErrVarList", "expected a variable name after ','" }, { "ErrExprList", "expected an expression after ','" }, { "ErrOrExpr", "expected an expression after 'or'" }, { "ErrAndExpr", "expected an expression after 'and'" }, { "ErrRelExpr", "expected an expression after the relational operator" }, { "ErrBOrExpr", "expected an expression after '|'" }, { "ErrBXorExpr", "expected an expression after '~'" }, { "ErrBAndExpr", "expected an expression after '&'" }, { "ErrShiftExpr", "expected an expression after the bit shift" }, { "ErrConcatExpr", "expected an expression after '..'" }, { "ErrAddExpr", "expected an expression after the additive operator" }, { "ErrMulExpr", "expected an expression after the multiplicative operator" }, { "ErrUnaryExpr", "expected an expression after the unary operator" }, { "ErrPowExpr", "expected an expression after '^'" }, { "ErrExprParen", "expected an expression after '('" }, { "ErrCParenExpr", "expected ')' to close the expression" }, { "ErrNameIndex", "expected a field name after '.'" }, { "ErrExprIndex", "expected an expression after '['" }, { "ErrCBracketIndex", "expected ']' to close the indexing expression" }, { "ErrNameMeth", "expected a method name after ':'" }, { "ErrMethArgs", "expected some arguments for the method call (or '()')" }, { "ErrArgList", "expected an expression after ',' in the argument list" }, { "ErrCParenArgs", "expected ')' to close the argument list" }, { "ErrCBraceTable", "expected '}' to close the table constructor" }, { "ErrEqField", "expected '=' after the table key" }, { "ErrExprField", "expected an expression after '='" }, { "ErrExprFKey", "expected an expression after '[' for the table key" }, { "ErrCBracketFKey", "expected ']' to close the table key" }, { "ErrCBraceDestructuring", "expected '}' to close the destructuring variable list" }, { "ErrDestructuringEqField", "expected '=' after the table key in destructuring variable list" }, { "ErrDestructuringExprField", "expected an identifier after '=' in destructuring variable list" }, { "ErrCBracketTableCompr", "expected ']' to close the table comprehension" }, { "ErrDigitHex", "expected one or more hexadecimal digits after '0x'" }, { "ErrDigitDeci", "expected one or more digits after the decimal point" }, { "ErrDigitExpo", "expected one or more digits for the exponent" }, { "ErrQuote", "unclosed string" }, { "ErrHexEsc", "expected exactly two hexadecimal digits after '\\x'" }, { "ErrOBraceUEsc", "expected '{' after '\\u'" }, { "ErrDigitUEsc", "expected one or more hexadecimal digits for the UTF-8 code point" }, { "ErrCBraceUEsc", "expected '}' after the code point" }, { "ErrEscSeq", "invalid escape sequence" }, { "ErrCloseLStr", "unclosed long string" }, } local function throw(label) label = "Err" .. label for i, labelinfo in ipairs(labels) do if labelinfo[1] == label then return T(i) end end error("Label not found: " .. label) end local function expect (patt, label) return patt + throw(label) end -- regular combinators and auxiliary functions local function token (patt) return patt * V"Skip" end local function sym (str) return token(P(str)) end local function kw (str) return token(P(str) * -V"IdRest") end local function tagC (tag, patt) return Ct(Cg(Cp(), "pos") * Cg(Cc(tag), "tag") * patt) end local function unaryOp (op, e) return { tag = "Op", pos = e.pos, [1] = op, [2] = e } end local function binaryOp (e1, op, e2) if not op then return e1 else return { tag = "Op", pos = e1.pos, [1] = op, [2] = e1, [3] = e2 } end end local function sepBy (patt, sep, label) if label then return patt * Cg(sep * expect(patt, label))^0 else return patt * Cg(sep * patt)^0 end end local function chainOp (patt, sep, label) return Cf(sepBy(patt, sep, label), binaryOp) end local function commaSep (patt, label) return sepBy(patt, sym(","), label) end local function tagDo (block) block.tag = "Do" return block end local function fixFuncStat (func) if func[1].is_method then table.insert(func[2][1], 1, { tag = "Id", [1] = "self" }) end func[1] = {func[1]} func[2] = {func[2]} return func end local function addDots (params, dots) if dots then table.insert(params, dots) end return params end local function insertIndex (t, index) return { tag = "Index", pos = t.pos, [1] = t, [2] = index } end local function markMethod (t, method) if method then return { tag = "Index", pos = t.pos, is_method = true, [1] = t, [2] = method } end return t end local function makeSuffixedExpr (t1, t2) if t2.tag == "Call" or t2.tag == "SafeCall" then local t = { tag = t2.tag, pos = t1.pos, [1] = t1 } for k, v in ipairs(t2) do table.insert(t, v) end return t elseif t2.tag == "MethodStub" or t2.tag == "SafeMethodStub" then return { tag = t2.tag, pos = t1.pos, [1] = t1, [2] = t2[1] } elseif t2.tag == "SafeDotIndex" or t2.tag == "SafeArrayIndex" then return { tag = "SafeIndex", pos = t1.pos, [1] = t1, [2] = t2[1] } elseif t2.tag == "DotIndex" or t2.tag == "ArrayIndex" then return { tag = "Index", pos = t1.pos, [1] = t1, [2] = t2[1] } else error("unexpected tag in suffixed expression") end end local function fixShortFunc (t) if t[1] == ":" then -- self method table.insert(t[2], 1, { tag = "Id", "self" }) table.remove(t, 1) t.is_method = true end t.is_short = true return t end local function statToExpr (t) -- tag a StatExpr t.tag = t.tag .. "Expr" return t end local function fixStructure (t) -- fix the AST structure if needed local i = 1 while i <= #t do if type(t[i]) == "table" then fixStructure(t[i]) for j=#t[i], 1, -1 do local stat = t[i][j] if type(stat) == "table" and stat.move_up_block and stat.move_up_block > 0 then table.remove(t[i], j) table.insert(t, i+1, stat) if t.tag == "Block" or t.tag == "Do" then stat.move_up_block = stat.move_up_block - 1 end end end end i = i + 1 end return t end local function searchEndRec (block, isRecCall) -- recursively search potential "end" keyword wrongly consumed by a short anonymous function on stat end (yeah, too late to change the syntax to something easier to parse) for i, stat in ipairs(block) do -- Non recursive statements if stat.tag == "Set" or stat.tag == "Push" or stat.tag == "Return" or stat.tag == "Local" or stat.tag == "Let" or stat.tag == "Localrec" then local exprlist if stat.tag == "Set" or stat.tag == "Local" or stat.tag == "Let" or stat.tag == "Localrec" then exprlist = stat[#stat] elseif stat.tag == "Push" or stat.tag == "Return" then exprlist = stat end local last = exprlist[#exprlist] -- last value in ExprList -- Stuff parse shittily only for short function declaration which are not method and whith strictly one variable name between the parenthesis. -- Otherwise it's invalid Lua anyway, so not my problem. if last.tag == "Function" and last.is_short and not last.is_method and #last[1] == 1 then local p = i for j, fstat in ipairs(last[2]) do p = i + j table.insert(block, p, fstat) -- copy stats from func body to block if stat.move_up_block then -- extracted stats inherit move_up_block from statement fstat.move_up_block = (fstat.move_up_block or 0) + stat.move_up_block end if block.is_singlestatblock then -- if it's a single stat block, mark them to move them outside of the block fstat.move_up_block = (fstat.move_up_block or 0) + 1 end end exprlist[#exprlist] = last[1] -- replace func with paren and expressions exprlist[#exprlist].tag = "Paren" if not isRecCall then -- if superfluous statements won't be moved in a next recursion, let fixStructure handle things for j=p+1, #block, 1 do block[j].move_up_block = (block[j].move_up_block or 0) + 1 end end return block, i -- I lied, stuff can also be recursive here (StatExpr & Function) elseif last.tag:match("Expr$") then local r = searchEndRec({ last }) if r then for j=2, #r, 1 do table.insert(block, i+j-1, r[j]) -- move back superflous statements from our new table to our real block end return block, i end elseif last.tag == "Function" then local r = searchEndRec(last[2]) if r then return block, i end end -- Recursive statements elseif stat.tag:match("^If") or stat.tag:match("^While") or stat.tag:match("^Repeat") or stat.tag:match("^Do") or stat.tag:match("^Fornum") or stat.tag:match("^Forin") then local blocks if stat.tag:match("^If") or stat.tag:match("^While") or stat.tag:match("^Repeat") or stat.tag:match("^Fornum") or stat.tag:match("^Forin") then blocks = stat elseif stat.tag:match("^Do") then blocks = { stat } end for _, iblock in ipairs(blocks) do if iblock.tag == "Block" then -- blocks local oldLen = #iblock local newiBlock, newEnd = searchEndRec(iblock, true) if newiBlock then -- if end in the block local p = i for j=newEnd+(#iblock-oldLen)+1, #iblock, 1 do -- move all statements after the newely added statements to the parent block p = p + 1 table.insert(block, p, iblock[j]) iblock[j] = nil end if not isRecCall then -- if superfluous statements won't be moved in a next recursion, let fixStructure handle things for j=p+1, #block, 1 do block[j].move_up_block = (block[j].move_up_block or 0) + 1 end end return block, i end end end end end return nil end local function searchEnd (s, p, t) -- match time capture which try to restructure the AST to free an "end" for us local r = searchEndRec(fixStructure(t)) if not r then return false end return true, r end local function expectBlockOrSingleStatWithStartEnd (start, startLabel, stopLabel, canFollow) -- will try a SingleStat if start doesn't match if canFollow then return (-start * V"SingleStatBlock" * canFollow^-1) + (expect(start, startLabel) * ((V"Block" * (canFollow + kw("end"))) + (Cmt(V"Block", searchEnd) + throw(stopLabel)))) else return (-start * V"SingleStatBlock") + (expect(start, startLabel) * ((V"Block" * kw("end")) + (Cmt(V"Block", searchEnd) + throw(stopLabel)))) end end local function expectBlockWithEnd (label) -- can't work *optionnaly* with SingleStat unfortunatly return (V"Block" * kw("end")) + (Cmt(V"Block", searchEnd) + throw(label)) end local function maybeBlockWithEnd () -- same as above but don't error if it doesn't match return (V"BlockNoErr" * kw("end")) + Cmt(V"BlockNoErr", searchEnd) end local stacks = { lexpr = {} } local function push (f) return Cmt(P"", function() table.insert(stacks[f], true) return true end) end local function pop (f) return Cmt(P"", function() table.remove(stacks[f]) return true end) end local function when (f) return Cmt(P"", function() return #stacks[f] > 0 end) end local function set (f, patt) -- patt *must* succeed (or throw an error) to preserve stack integrity return push(f) * patt * pop(f) end -- grammar local G = { V"Lua", Lua = (V"Shebang"^-1 * V"Skip" * V"Block" * expect(P(-1), "Extra")) / fixStructure; Shebang = P"#!" * (P(1) - P"\n")^0; Block = tagC("Block", (V"Stat" + -V"BlockEnd" * throw("InvalidStat"))^0 * ((V"RetStat" + V"ImplicitPushStat") * sym(";")^-1)^-1); Stat = V"IfStat" + V"DoStat" + V"WhileStat" + V"RepeatStat" + V"ForStat" + V"LocalStat" + V"FuncStat" + V"BreakStat" + V"LabelStat" + V"GoToStat" + V"LetStat" + V"FuncCall" + V"Assignment" + V"ContinueStat" + V"PushStat" + sym(";"); BlockEnd = P"return" + "end" + "elseif" + "else" + "until" + "]" + -1 + V"ImplicitPushStat" + V"Assignment"; SingleStatBlock = tagC("Block", V"Stat" + V"RetStat" + V"ImplicitPushStat") / function(t) t.is_singlestatblock = true return t end; BlockNoErr = tagC("Block", V"Stat"^0 * ((V"RetStat" + V"ImplicitPushStat") * sym(";")^-1)^-1); -- used to check if something a valid block without throwing an error IfStat = tagC("If", V"IfPart"); IfPart = kw("if") * set("lexpr", expect(V"Expr", "ExprIf")) * expectBlockOrSingleStatWithStartEnd(kw("then"), "ThenIf", "EndIf", V"ElseIfPart" + V"ElsePart"); ElseIfPart = kw("elseif") * set("lexpr", expect(V"Expr", "ExprEIf")) * expectBlockOrSingleStatWithStartEnd(kw("then"), "ThenEIf", "EndIf", V"ElseIfPart" + V"ElsePart"); ElsePart = kw("else") * expectBlockWithEnd("EndIf"); DoStat = kw("do") * expectBlockWithEnd("EndDo") / tagDo; WhileStat = tagC("While", kw("while") * set("lexpr", expect(V"Expr", "ExprWhile")) * V"WhileBody"); WhileBody = expectBlockOrSingleStatWithStartEnd(kw("do"), "DoWhile", "EndWhile"); RepeatStat = tagC("Repeat", kw("repeat") * V"Block" * expect(kw("until"), "UntilRep") * expect(V"Expr", "ExprRep")); ForStat = kw("for") * expect(V"ForNum" + V"ForIn", "ForRange"); ForNum = tagC("Fornum", V"Id" * sym("=") * V"NumRange" * V"ForBody"); NumRange = expect(V"Expr", "ExprFor1") * expect(sym(","), "CommaFor") *expect(V"Expr", "ExprFor2") * (sym(",") * expect(V"Expr", "ExprFor3"))^-1; ForIn = tagC("Forin", V"DestructuringNameList" * expect(kw("in"), "InFor") * expect(V"ExprList", "EListFor") * V"ForBody"); ForBody = expectBlockOrSingleStatWithStartEnd(kw("do"), "DoFor", "EndFor"); LocalStat = kw("local") * expect(V"LocalFunc" + V"LocalAssign", "DefLocal"); LocalFunc = tagC("Localrec", kw("function") * expect(V"Id", "NameLFunc") * V"FuncBody") / fixFuncStat; LocalAssign = tagC("Local", V"NameList" * (sym("=") * expect(V"ExprList", "EListLAssign") + Ct(Cc()))) + tagC("Local", V"DestructuringNameList" * sym("=") * expect(V"ExprList", "EListLAssign")); LetStat = kw("let") * expect(V"LetAssign", "DefLet"); LetAssign = tagC("Let", V"NameList" * (sym("=") * expect(V"ExprList", "EListLAssign") + Ct(Cc()))) + tagC("Let", V"DestructuringNameList" * sym("=") * expect(V"ExprList", "EListLAssign")); Assignment = tagC("Set", (V"VarList" + V"DestructuringNameList") * V"BinOp"^-1 * (P"=" / "=") * ((V"BinOp" - P"-") + #(P"-" * V"Space") * V"BinOp")^-1 * V"Skip" * expect(V"ExprList", "EListAssign")); FuncStat = tagC("Set", kw("function") * expect(V"FuncName", "FuncName") * V"FuncBody") / fixFuncStat; FuncName = Cf(V"Id" * (sym(".") * expect(V"StrId", "NameFunc1"))^0, insertIndex) * (sym(":") * expect(V"StrId", "NameFunc2"))^-1 / markMethod; FuncBody = tagC("Function", V"FuncParams" * expectBlockWithEnd("EndFunc")); FuncParams = expect(sym("("), "OParenPList") * V"ParList" * expect(sym(")"), "CParenPList"); ParList = V"NamedParList" * (sym(",") * expect(tagC("Dots", sym("...")), "ParList"))^-1 / addDots + Ct(tagC("Dots", sym("..."))) + Ct(Cc()); -- Cc({}) generates a bug since the {} would be shared across parses ShortFuncDef = tagC("Function", V"ShortFuncParams" * maybeBlockWithEnd()) / fixShortFunc; ShortFuncParams = (sym(":") / ":")^-1 * sym("(") * V"ParList" * sym(")"); NamedParList = tagC("NamedParList", commaSep(V"NamedPar")); NamedPar = tagC("ParPair", V"ParKey" * expect(sym("="), "EqField") * expect(V"Expr", "ExprField")) + V"Id"; ParKey = V"Id" * #("=" * -P"="); LabelStat = tagC("Label", sym("::") * expect(V"Name", "Label") * expect(sym("::"), "CloseLabel")); GoToStat = tagC("Goto", kw("goto") * expect(V"Name", "Goto")); BreakStat = tagC("Break", kw("break")); ContinueStat = tagC("Continue", kw("continue")); RetStat = tagC("Return", kw("return") * commaSep(V"Expr", "RetList")^-1); PushStat = tagC("Push", kw("push") * commaSep(V"Expr", "RetList")^-1); ImplicitPushStat = tagC("Push", commaSep(V"Expr", "RetList")); NameList = tagC("NameList", commaSep(V"Id")); DestructuringNameList = tagC("NameList", commaSep(V"DestructuringId")), VarList = tagC("VarList", commaSep(V"VarExpr")); ExprList = tagC("ExpList", commaSep(V"Expr", "ExprList")); DestructuringId = tagC("DestructuringId", sym("{") * V"DestructuringIdFieldList" * expect(sym("}"), "CBraceDestructuring")) + V"Id", DestructuringIdFieldList = sepBy(V"DestructuringIdField", V"FieldSep") * V"FieldSep"^-1; DestructuringIdField = tagC("Pair", V"FieldKey" * expect(sym("="), "DestructuringEqField") * expect(V"Id", "DestructuringExprField")) + V"Id"; Expr = V"OrExpr"; OrExpr = chainOp(V"AndExpr", V"OrOp", "OrExpr"); AndExpr = chainOp(V"RelExpr", V"AndOp", "AndExpr"); RelExpr = chainOp(V"BOrExpr", V"RelOp", "RelExpr"); BOrExpr = chainOp(V"BXorExpr", V"BOrOp", "BOrExpr"); BXorExpr = chainOp(V"BAndExpr", V"BXorOp", "BXorExpr"); BAndExpr = chainOp(V"ShiftExpr", V"BAndOp", "BAndExpr"); ShiftExpr = chainOp(V"ConcatExpr", V"ShiftOp", "ShiftExpr"); ConcatExpr = V"AddExpr" * (V"ConcatOp" * expect(V"ConcatExpr", "ConcatExpr"))^-1 / binaryOp; AddExpr = chainOp(V"MulExpr", V"AddOp", "AddExpr"); MulExpr = chainOp(V"UnaryExpr", V"MulOp", "MulExpr"); UnaryExpr = V"UnaryOp" * expect(V"UnaryExpr", "UnaryExpr") / unaryOp + V"PowExpr"; PowExpr = V"SimpleExpr" * (V"PowOp" * expect(V"UnaryExpr", "PowExpr"))^-1 / binaryOp; SimpleExpr = tagC("Number", V"Number") + tagC("Nil", kw("nil")) + tagC("Boolean", kw("false") * Cc(false)) + tagC("Boolean", kw("true") * Cc(true)) + tagC("Dots", sym("...")) + V"FuncDef" + (when("lexpr") * tagC("LetExpr", V"DestructuringNameList" * sym("=") * -sym("=") * expect(V"ExprList", "EListLAssign"))) + V"ShortFuncDef" + V"SuffixedExpr" + V"StatExpr"; StatExpr = (V"IfStat" + V"DoStat" + V"WhileStat" + V"RepeatStat" + V"ForStat") / statToExpr; FuncCall = Cmt(V"SuffixedExpr", function(s, i, exp) return exp.tag == "Call" or exp.tag == "SafeCall", exp end); VarExpr = Cmt(V"SuffixedExpr", function(s, i, exp) return exp.tag == "Id" or exp.tag == "Index", exp end); SuffixedExpr = Cf(V"PrimaryExpr" * (V"Index" + V"MethodStub" + V"Call")^0 + V"NoCallPrimaryExpr" * -V"Call" * (V"Index" + V"MethodStub" + V"Call")^0 + V"NoCallPrimaryExpr", makeSuffixedExpr); PrimaryExpr = V"SelfId" * (V"SelfCall" + V"SelfIndex") + V"Id" + tagC("Paren", sym("(") * expect(V"Expr", "ExprParen") * expect(sym(")"), "CParenExpr")); NoCallPrimaryExpr = tagC("String", V"String") + V"Table" + V"TableCompr"; Index = tagC("DotIndex", sym("." * -P".") * expect(V"StrId", "NameIndex")) + tagC("ArrayIndex", sym("[" * -P(S"=[")) * expect(V"Expr", "ExprIndex") * expect(sym("]"), "CBracketIndex")) + tagC("SafeDotIndex", sym("?." * -P".") * expect(V"StrId", "NameIndex")) + tagC("SafeArrayIndex", sym("?[" * -P(S"=[")) * expect(V"Expr", "ExprIndex") * expect(sym("]"), "CBracketIndex")); MethodStub = tagC("MethodStub", sym(":" * -P":") * expect(V"StrId", "NameMeth")) + tagC("SafeMethodStub", sym("?:" * -P":") * expect(V"StrId", "NameMeth")); Call = tagC("Call", V"FuncArgs") + tagC("SafeCall", P"?" * V"FuncArgs"); SelfCall = tagC("MethodStub", V"StrId") * V"Call"; SelfIndex = tagC("DotIndex", V"StrId"); FuncDef = (kw("function") * V"FuncBody"); FuncArgs = sym("(") * commaSep(V"Expr", "ArgList")^-1 * expect(sym(")"), "CParenArgs") + V"Table" + tagC("String", V"String"); Table = tagC("Table", sym("{") * V"FieldList"^-1 * expect(sym("}"), "CBraceTable")); FieldList = sepBy(V"Field", V"FieldSep") * V"FieldSep"^-1; Field = tagC("Pair", V"FieldKey" * expect(sym("="), "EqField") * expect(V"Expr", "ExprField")) + V"Expr"; FieldKey = sym("[" * -P(S"=[")) * expect(V"Expr", "ExprFKey") * expect(sym("]"), "CBracketFKey") + V"StrId" * #("=" * -P"="); FieldSep = sym(",") + sym(";"); TableCompr = tagC("TableCompr", sym("[") * V"Block" * expect(sym("]"), "CBracketTableCompr")); SelfId = tagC("Id", sym"@" / "self"); Id = tagC("Id", V"Name") + V"SelfId"; StrId = tagC("String", V"Name"); -- lexer Skip = (V"Space" + V"Comment")^0; Space = space^1; Comment = P"--" * V"LongStr" / function () return end + P"--" * (P(1) - P"\n")^0; Name = token(-V"Reserved" * C(V"Ident")); Reserved = V"Keywords" * -V"IdRest"; Keywords = P"and" + "break" + "do" + "elseif" + "else" + "end" + "false" + "for" + "function" + "goto" + "if" + "in" + "local" + "nil" + "not" + "or" + "repeat" + "return" + "then" + "true" + "until" + "while"; Ident = V"IdStart" * V"IdRest"^0; IdStart = alpha + P"_"; IdRest = alnum + P"_"; Number = token(C(V"Hex" + V"Float" + V"Int")); Hex = (P"0x" + "0X") * ((xdigit^0 * V"DeciHex") + (expect(xdigit^1, "DigitHex") * V"DeciHex"^-1)) * V"ExpoHex"^-1; Float = V"Decimal" * V"Expo"^-1 + V"Int" * V"Expo"; Decimal = digit^1 * "." * digit^0 + P"." * -P"." * expect(digit^1, "DigitDeci"); DeciHex = P"." * xdigit^0; Expo = S"eE" * S"+-"^-1 * expect(digit^1, "DigitExpo"); ExpoHex = S"pP" * S"+-"^-1 * expect(xdigit^1, "DigitExpo"); Int = digit^1; String = token(V"ShortStr" + V"LongStr"); ShortStr = P'"' * Cs((V"EscSeq" + (P(1)-S'"\n'))^0) * expect(P'"', "Quote") + P"'" * Cs((V"EscSeq" + (P(1)-S"'\n"))^0) * expect(P"'", "Quote"); EscSeq = P"\\" / "" -- remove backslash * ( P"a" / "\a" + P"b" / "\b" + P"f" / "\f" + P"n" / "\n" + P"r" / "\r" + P"t" / "\t" + P"v" / "\v" + P"\n" / "\n" + P"\r" / "\n" + P"\\" / "\\" + P"\"" / "\"" + P"\'" / "\'" + P"z" * space^0 / "" + digit * digit^-2 / tonumber / string.char + P"x" * expect(C(xdigit * xdigit), "HexEsc") * Cc(16) / tonumber / string.char + P"u" * expect("{", "OBraceUEsc") * expect(C(xdigit^1), "DigitUEsc") * Cc(16) * expect("}", "CBraceUEsc") / tonumber / (utf8 and utf8.char or string.char) -- true max is \u{10FFFF} -- utf8.char needs Lua 5.3 -- string.char works only until \u{FF} + throw("EscSeq") ); LongStr = V"Open" * C((P(1) - V"CloseEq")^0) * expect(V"Close", "CloseLStr") / function (s, eqs) return s end; Open = "[" * Cg(V"Equals", "openEq") * "[" * P"\n"^-1; Close = "]" * C(V"Equals") * "]"; Equals = P"="^0; CloseEq = Cmt(V"Close" * Cb("openEq"), function (s, i, closeEq, openEq) return #openEq == #closeEq end); OrOp = kw("or") / "or"; AndOp = kw("and") / "and"; RelOp = sym("~=") / "ne" + sym("==") / "eq" + sym("<=") / "le" + sym(">=") / "ge" + sym("<") / "lt" + sym(">") / "gt"; BOrOp = sym("|") / "bor"; BXorOp = sym("~" * -P"=") / "bxor"; BAndOp = sym("&") / "band"; ShiftOp = sym("<<") / "shl" + sym(">>") / "shr"; ConcatOp = sym("..") / "concat"; AddOp = sym("+") / "add" + sym("-") / "sub"; MulOp = sym("*") / "mul" + sym("//") / "idiv" + sym("/") / "div" + sym("%") / "mod"; UnaryOp = kw("not") / "not" + sym("-") / "unm" + sym("#") / "len" + sym("~") / "bnot"; PowOp = sym("^") / "pow"; BinOp = V"OrOp" + V"AndOp" + V"BOrOp" + V"BXorOp" + V"BAndOp" + V"ShiftOp" + V"ConcatOp" + V"AddOp" + V"MulOp" + V"PowOp"; } local parser = {} local validator = require("lib.lua-parser.validator") local validate = validator.validate local syntaxerror = validator.syntaxerror function parser.parse (subject, filename) local errorinfo = { subject = subject, filename = filename } lpeg.setmaxstack(1000) local ast, label, errpos = lpeg.match(G, subject, nil, errorinfo) if not ast then local errmsg = labels[label][2] return ast, syntaxerror(errorinfo, errpos, errmsg) end return validate(ast, errorinfo) end return parser