--[[ This module implements a parser for Lua 5.3 with LPeg, and generates an Abstract Syntax Tree that is similar to the one generated by Metalua. For more information about Metalua, please, visit: https://github.com/fab13n/metalua-parser block: { stat* } stat: `Do{ stat* } | `Set{ {lhs+} {expr+} } -- lhs1, lhs2... = e1, e2... | `While{ expr block } -- while e do b end | `Repeat{ block expr } -- repeat b until e | `If{ (expr block)+ block? } -- if e1 then b1 [elseif e2 then b2] ... [else bn] end | `Fornum{ ident expr expr expr? block } -- for ident = e, e[, e] do b end | `Forin{ {ident+} {expr+} block } -- for i1, i2... in e1, e2... do b end | `Local{ {ident+} {expr+}? } -- local i1, i2... = e1, e2... | `Localrec{ ident expr } -- only used for 'local function' | `Goto{ } -- goto str | `Label{ } -- ::str:: | `Return{ } -- return e1, e2... | `Break -- break | apply expr: `Nil | `Dots | `Boolean{ } | `Number{ } | `String{ } | `Function{ { `Id{ }* `Dots? } block } | `Table{ ( `Pair{ expr expr } | expr )* } | `Op{ opid expr expr? } | `Paren{ expr } -- significant to cut multiple values returns | apply | lhs apply: `Call{ expr expr* } | `Invoke{ expr `String{ } expr* } lhs: `Id{ } | `Index{ expr expr } opid: -- includes additional operators from Lua 5.3 and all relational operators 'add' | 'sub' | 'mul' | 'div' | 'idiv' | 'mod' | 'pow' | 'concat' | 'band' | 'bor' | 'bxor' | 'shl' | 'shr' | 'eq' | 'ne' | 'lt' | 'gt' | 'le' | 'ge' | 'and' | 'or' | 'unm' | 'len' | 'bnot' | 'not' ]] local lpeg = require "lpeglabel" lpeg.locale(lpeg) local P, S, V = lpeg.P, lpeg.S, lpeg.V local C, Carg, Cb, Cc = lpeg.C, lpeg.Carg, lpeg.Cb, lpeg.Cc local Cf, Cg, Cmt, Cp, Cs, Ct = lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct local Lc, T = lpeg.Lc, lpeg.T local alpha, digit, alnum = lpeg.alpha, lpeg.digit, lpeg.alnum local xdigit = lpeg.xdigit local space = lpeg.space -- error message auxiliary functions local labels = { { "ErrExtra", "unexpected character(s), expected EOF" }, { "ErrInvalidStat", "unexpected token, invalid start of statement" }, { "ErrEndIf", "expected 'end' to close the if statement" }, { "ErrExprIf", "expected a condition after 'if'" }, { "ErrThenIf", "expected 'then' after the condition" }, { "ErrExprEIf", "expected a condition after 'elseif'" }, { "ErrThenEIf", "expected 'then' after the condition" }, { "ErrEndDo", "expected 'end' to close the do block" }, { "ErrExprWhile", "expected a condition after 'while'" }, { "ErrDoWhile", "expected 'do' after the condition" }, { "ErrEndWhile", "expected 'end' to close the while loop" }, { "ErrUntilRep", "expected 'until' at the end of the repeat loop" }, { "ErrExprRep", "expected a conditions after 'until'" }, { "ErrForRange", "expected a numeric or generic range after 'for'" }, { "ErrEndFor", "expected 'end' to close the for loop" }, { "ErrExprFor1", "expected a starting expression for the numeric range" }, { "ErrCommaFor", "expected ',' to split the start and end of the range" }, { "ErrExprFor2", "expected an ending expression for the numeric range" }, { "ErrExprFor3", "expected a step expression for the numeric range after ','" }, { "ErrInFor", "expected '=' or 'in' after the variable(s)" }, { "ErrEListFor", "expected one or more expressions after 'in'" }, { "ErrDoFor", "expected 'do' after the range of the for loop" }, { "ErrDefLocal", "expected a function definition or assignment after local" }, { "ErrNameLFunc", "expected a function name after 'function'" }, { "ErrEListLAssign", "expected one or more expressions after '='" }, { "ErrEListAssign", "expected one or more expressions after '='" }, { "ErrFuncName", "expected a function name after 'function'" }, { "ErrNameFunc1", "expected a function name after '.'" }, { "ErrNameFunc2", "expected a method name after ':'" }, { "ErrOParenPList", "expected '(' for the parameter list" }, { "ErrCParenPList", "expected ')' to close the parameter list" }, { "ErrEndFunc", "expected 'end' to close the function body" }, { "ErrParList", "expected a variable name or '...' after ','" }, { "ErrLabel", "expected a label name after '::'" }, { "ErrCloseLabel", "expected '::' after the label" }, { "ErrGoto", "expected a label after 'goto'" }, { "ErrRetList", "expected an expression after ',' in the return statement" }, { "ErrVarList", "expected a variable name after ','" }, { "ErrExprList", "expected an expression after ','" }, { "ErrOrExpr", "expected an expression after 'or'" }, { "ErrAndExpr", "expected an expression after 'and'" }, { "ErrRelExpr", "expected an expression after the relational operator" }, { "ErrBOrExpr", "expected an expression after '|'" }, { "ErrBXorExpr", "expected an expression after '~'" }, { "ErrBAndExpr", "expected an expression after '&'" }, { "ErrShiftExpr", "expected an expression after the bit shift" }, { "ErrConcatExpr", "expected an expression after '..'" }, { "ErrAddExpr", "expected an expression after the additive operator" }, { "ErrMulExpr", "expected an expression after the multiplicative operator" }, { "ErrUnaryExpr", "expected an expression after the unary operator" }, { "ErrPowExpr", "expected an expression after '^'" }, { "ErrExprParen", "expected an expression after '('" }, { "ErrCParenExpr", "expected ')' to close the expression" }, { "ErrNameIndex", "expected a field name after '.'" }, { "ErrExprIndex", "expected an expression after '['" }, { "ErrCBracketIndex", "expected ']' to close the indexing expression" }, { "ErrNameMeth", "expected a method name after ':'" }, { "ErrMethArgs", "expected some arguments for the method call (or '()')" }, { "ErrArgList", "expected an expression after ',' in the argument list" }, { "ErrCParenArgs", "expected ')' to close the argument list" }, { "ErrCBraceTable", "expected '}' to close the table constructor" }, { "ErrEqField", "expected '=' after the table key" }, { "ErrExprField", "expected an expression after '='" }, { "ErrExprFKey", "expected an expression after '[' for the table key" }, { "ErrCBracketFKey", "expected ']' to close the table key" }, { "ErrDigitHex", "expected one or more hexadecimal digits after '0x'" }, { "ErrDigitDeci", "expected one or more digits after the decimal point" }, { "ErrDigitExpo", "expected one or more digits for the exponent" }, { "ErrQuote", "unclosed string" }, { "ErrHexEsc", "expected exactly two hexadecimal digits after '\\x'" }, { "ErrOBraceUEsc", "expected '{' after '\\u'" }, { "ErrDigitUEsc", "expected one or more hexadecimal digits for the UTF-8 code point" }, { "ErrCBraceUEsc", "expected '}' after the code point" }, { "ErrEscSeq", "invalid escape sequence" }, { "ErrCloseLStr", "unclosed long string" }, } local function throw(label) label = "Err" .. label for i, labelinfo in ipairs(labels) do if labelinfo[1] == label then return T(i) end end error("Label not found: " .. label) end local function expect (patt, label) return patt + throw(label) end -- regular combinators and auxiliary functions local function token (patt) return patt * V"Skip" end local function sym (str) return token(P(str)) end local function kw (str) return token(P(str) * -V"IdRest") end local function tagC (tag, patt) return Ct(Cg(Cp(), "pos") * Cg(Cc(tag), "tag") * patt) end local function unaryOp (op, e) return { tag = "Op", pos = e.pos, [1] = op, [2] = e } end local function binaryOp (e1, op, e2) if not op then return e1 else return { tag = "Op", pos = e1.pos, [1] = op, [2] = e1, [3] = e2 } end end local function sepBy (patt, sep, label) if label then return patt * Cg(sep * expect(patt, label))^0 else return patt * Cg(sep * patt)^0 end end local function chainOp (patt, sep, label) return Cf(sepBy(patt, sep, label), binaryOp) end local function commaSep (patt, label) return sepBy(patt, sym(","), label) end local function tagDo (block) block.tag = "Do" return block end local function fixFuncStat (func) if func[1].is_method then table.insert(func[2][1], 1, { tag = "Id", [1] = "self" }) end func[1] = {func[1]} func[2] = {func[2]} return func end local function addDots (params, dots) if dots then table.insert(params, dots) end return params end local function insertIndex (t, index) return { tag = "Index", pos = t.pos, [1] = t, [2] = index } end local function markMethod(t, method) if method then return { tag = "Index", pos = t.pos, is_method = true, [1] = t, [2] = method } end return t end local function makeIndexOrCall (t1, t2) if t2.tag == "Call" or t2.tag == "Invoke" then local t = { tag = t2.tag, pos = t1.pos, [1] = t1 } for k, v in ipairs(t2) do table.insert(t, v) end return t end return { tag = "Index", pos = t1.pos, [1] = t1, [2] = t2[1] } end -- grammar local G = { V"Lua", Lua = V"Shebang"^-1 * V"Skip" * V"Block" * expect(P(-1), "Extra"); Shebang = P"#!" * (P(1) - P"\n")^0; Block = tagC("Block", V"Stat"^0 * V"RetStat"^-1); Stat = V"IfStat" + V"DoStat" + V"WhileStat" + V"RepeatStat" + V"ForStat" + V"LocalStat" + V"FuncStat" + V"BreakStat" + V"LabelStat" + V"GoToStat" + V"FuncCall" + V"Assignment" + sym(";") + -V"BlockEnd" * throw("InvalidStat"); BlockEnd = P"return" + "end" + "elseif" + "else" + "until" + -1; IfStat = tagC("If", V"IfPart" * V"ElseIfPart"^0 * V"ElsePart"^-1 * expect(kw("end"), "EndIf")); IfPart = kw("if") * expect(V"Expr", "ExprIf") * expect(kw("then"), "ThenIf") * V"Block"; ElseIfPart = kw("elseif") * expect(V"Expr", "ExprEIf") * expect(kw("then"), "ThenEIf") * V"Block"; ElsePart = kw("else") * V"Block"; DoStat = kw("do") * V"Block" * expect(kw("end"), "EndDo") / tagDo; WhileStat = tagC("While", kw("while") * expect(V"Expr", "ExprWhile") * V"WhileBody"); WhileBody = expect(kw("do"), "DoWhile") * V"Block" * expect(kw("end"), "EndWhile"); RepeatStat = tagC("Repeat", kw("repeat") * V"Block" * expect(kw("until"), "UntilRep") * expect(V"Expr", "ExprRep")); ForStat = kw("for") * expect(V"ForNum" + V"ForIn", "ForRange") * expect(kw("end"), "EndFor"); ForNum = tagC("Fornum", V"Id" * sym("=") * V"NumRange" * V"ForBody"); NumRange = expect(V"Expr", "ExprFor1") * expect(sym(","), "CommaFor") *expect(V"Expr", "ExprFor2") * (sym(",") * expect(V"Expr", "ExprFor3"))^-1; ForIn = tagC("Forin", V"NameList" * expect(kw("in"), "InFor") * expect(V"ExprList", "EListFor") * V"ForBody"); ForBody = expect(kw("do"), "DoFor") * V"Block"; LocalStat = kw("local") * expect(V"LocalFunc" + V"LocalAssign", "DefLocal"); LocalFunc = tagC("Localrec", kw("function") * expect(V"Id", "NameLFunc") * V"FuncBody") / fixFuncStat; LocalAssign = tagC("Local", V"NameList" * (sym("=") * expect(V"ExprList", "EListLAssign") + Ct(Cc()))); Assignment = tagC("Set", V"VarList" * V"AssignmentOp" * expect(V"ExprList", "EListAssign")); FuncStat = tagC("Set", kw("function") * expect(V"FuncName", "FuncName") * V"FuncBody") / fixFuncStat; FuncName = Cf(V"Id" * (sym(".") * expect(V"StrId", "NameFunc1"))^0, insertIndex) * (sym(":") * expect(V"StrId", "NameFunc2"))^-1 / markMethod; FuncBody = tagC("Function", V"FuncParams" * V"Block" * expect(kw("end"), "EndFunc")); FuncParams = expect(sym("("), "OParenPList") * V"ParList" * expect(sym(")"), "CParenPList"); ParList = V"NamedParList" * (sym(",") * expect(tagC("Dots", sym("...")), "ParList"))^-1 / addDots + Ct(tagC("Dots", sym("..."))) + Ct(Cc()); -- Cc({}) generates a bug since the {} would be shared across parses NamedParList = tagC("NamedParList", commaSep(V"NamedPar")); NamedPar = tagC("ParPair", V"ParKey" * expect(sym("="), "EqField") * expect(V"Expr", "ExprField")) + V"Id"; ParKey = V"Id" * #("=" * -P"="); LabelStat = tagC("Label", sym("::") * expect(V"Name", "Label") * expect(sym("::"), "CloseLabel")); GoToStat = tagC("Goto", kw("goto") * expect(V"Name", "Goto")); BreakStat = tagC("Break", kw("break")); RetStat = tagC("Return", kw("return") * commaSep(V"Expr", "RetList")^-1 * sym(";")^-1); NameList = tagC("NameList", commaSep(V"Id")); VarList = tagC("VarList", commaSep(V"VarExpr", "VarList")); ExprList = tagC("ExpList", commaSep(V"Expr", "ExprList")); Expr = V"OrExpr"; OrExpr = chainOp(V"AndExpr", V"OrOp", "OrExpr"); AndExpr = chainOp(V"RelExpr", V"AndOp", "AndExpr"); RelExpr = chainOp(V"BOrExpr", V"RelOp", "RelExpr"); BOrExpr = chainOp(V"BXorExpr", V"BOrOp", "BOrExpr"); BXorExpr = chainOp(V"BAndExpr", V"BXorOp", "BXorExpr"); BAndExpr = chainOp(V"ShiftExpr", V"BAndOp", "BAndExpr"); ShiftExpr = chainOp(V"ConcatExpr", V"ShiftOp", "ShiftExpr"); ConcatExpr = V"AddExpr" * (V"ConcatOp" * expect(V"ConcatExpr", "ConcatExpr"))^-1 / binaryOp; AddExpr = chainOp(V"MulExpr", V"AddOp", "AddExpr"); MulExpr = chainOp(V"UnaryExpr", V"MulOp", "MulExpr"); UnaryExpr = V"UnaryOp" * expect(V"UnaryExpr", "UnaryExpr") / unaryOp + V"PowExpr"; PowExpr = V"SimpleExpr" * (V"PowOp" * expect(V"UnaryExpr", "PowExpr"))^-1 / binaryOp; SimpleExpr = tagC("Number", V"Number") + tagC("String", V"String") + tagC("Nil", kw("nil")) + tagC("Boolean", kw("false") * Cc(false)) + tagC("Boolean", kw("true") * Cc(true)) + tagC("Dots", sym("...")) + V"FuncDef" + V"Table" + V"SuffixedExpr"; FuncCall = Cmt(V"SuffixedExpr", function(s, i, exp) return exp.tag == "Call" or exp.tag == "Invoke", exp end); VarExpr = Cmt(V"SuffixedExpr", function(s, i, exp) return exp.tag == "Id" or exp.tag == "Index", exp end); SuffixedExpr = Cf(V"PrimaryExpr" * (V"Index" + V"Call")^0, makeIndexOrCall); PrimaryExpr = V"Id" + tagC("Paren", sym("(") * expect(V"Expr", "ExprParen") * expect(sym(")"), "CParenExpr")); Index = tagC("DotIndex", sym("." * -P".") * expect(V"StrId", "NameIndex")) + tagC("ArrayIndex", sym("[" * -P(S"=[")) * expect(V"Expr", "ExprIndex") * expect(sym("]"), "CBracketIndex")); Call = tagC("Invoke", Cg(sym(":" * -P":") * expect(V"StrId", "NameMeth") * expect(V"FuncArgs", "MethArgs"))) + tagC("Call", V"FuncArgs"); FuncDef = kw("function") * V"FuncBody"; FuncArgs = sym("(") * commaSep(V"Expr", "ArgList")^-1 * expect(sym(")"), "CParenArgs") + V"Table" + tagC("String", V"String"); Table = tagC("Table", sym("{") * V"FieldList"^-1 * expect(sym("}"), "CBraceTable")); FieldList = sepBy(V"Field", V"FieldSep") * V"FieldSep"^-1; Field = tagC("Pair", V"FieldKey" * expect(sym("="), "EqField") * expect(V"Expr", "ExprField")) + V"Expr"; FieldKey = sym("[" * -P(S"=[")) * expect(V"Expr", "ExprFKey") * expect(sym("]"), "CBracketFKey") + V"StrId" * #("=" * -P"="); FieldSep = sym(",") + sym(";"); Id = tagC("Id", V"Name"); StrId = tagC("String", V"Name"); -- lexer Skip = (V"Space" + V"Comment")^0; Space = space^1; Comment = P"--" * V"LongStr" / function () return end + P"--" * (P(1) - P"\n")^0; Name = token(-V"Reserved" * C(V"Ident")); Reserved = V"Keywords" * -V"IdRest"; Keywords = P"and" + "break" + "do" + "elseif" + "else" + "end" + "false" + "for" + "function" + "goto" + "if" + "in" + "local" + "nil" + "not" + "or" + "repeat" + "return" + "then" + "true" + "until" + "while"; Ident = V"IdStart" * V"IdRest"^0; IdStart = alpha + P"_"; IdRest = alnum + P"_"; Number = token((V"Hex" + V"Float" + V"Int") / tonumber); Hex = (P"0x" + "0X") * expect(xdigit^1, "DigitHex"); Float = V"Decimal" * V"Expo"^-1 + V"Int" * V"Expo"; Decimal = digit^1 * "." * digit^0 + P"." * -P"." * expect(digit^1, "DigitDeci"); Expo = S"eE" * S"+-"^-1 * expect(digit^1, "DigitExpo"); Int = digit^1; String = token(V"ShortStr" + V"LongStr"); ShortStr = P'"' * Cs((V"EscSeq" + (P(1)-S'"\n'))^0) * expect(P'"', "Quote") + P"'" * Cs((V"EscSeq" + (P(1)-S"'\n"))^0) * expect(P"'", "Quote"); EscSeq = P"\\" / "" -- remove backslash * ( P"a" / "\a" + P"b" / "\b" + P"f" / "\f" + P"n" / "\n" + P"r" / "\r" + P"t" / "\t" + P"v" / "\v" + P"\n" / "\n" + P"\r" / "\n" + P"\\" / "\\" + P"\"" / "\"" + P"\'" / "\'" + P"z" * space^0 / "" + digit * digit^-2 / tonumber / string.char + P"x" * expect(C(xdigit * xdigit), "HexEsc") * Cc(16) / tonumber / string.char + P"u" * expect("{", "OBraceUEsc") * expect(C(xdigit^1), "DigitUEsc") * Cc(16) * expect("}", "CBraceUEsc") / tonumber / (utf8 and utf8.char or string.char) -- true max is \u{10FFFF} -- utf8.char needs Lua 5.3 -- string.char works only until \u{FF} + throw("EscSeq") ); LongStr = V"Open" * C((P(1) - V"CloseEq")^0) * expect(V"Close", "CloseLStr") / function (s, eqs) return s end; Open = "[" * Cg(V"Equals", "openEq") * "[" * P"\n"^-1; Close = "]" * C(V"Equals") * "]"; Equals = P"="^0; CloseEq = Cmt(V"Close" * Cb("openEq"), function (s, i, closeEq, openEq) return #openEq == #closeEq end); OrOp = kw("or") / "or"; AndOp = kw("and") / "and"; RelOp = sym("~=") / "ne" + sym("==") / "eq" + sym("<=") / "le" + sym(">=") / "ge" + sym("<") / "lt" + sym(">") / "gt"; BOrOp = sym("|") / "bor"; BXorOp = sym("~" * -P"=") / "bxor"; BAndOp = sym("&") / "band"; ShiftOp = sym("<<") / "shl" + sym(">>") / "shr"; ConcatOp = sym("..") / "concat"; AddOp = sym("+") / "add" + sym("-") / "sub"; MulOp = sym("*") / "mul" + sym("//") / "idiv" + sym("/") / "div" + sym("%") / "mod"; UnaryOp = kw("not") / "not" + sym("-") / "unm" + sym("#") / "len" + sym("~") / "bnot"; PowOp = sym("^") / "pow"; AssignmentOp = (V"OrOp" + V"AndOp" + V"BOrOp" + V"BXorOp" + V"BAndOp" + V"ShiftOp" + V"ConcatOp" + V"AddOp" + V"MulOp" + V"PowOp")^-1 * sym("=") } local parser = {} local validator = require("lua-parser.validator") local validate = validator.validate local syntaxerror = validator.syntaxerror function parser.parse (subject, filename) local errorinfo = { subject = subject, filename = filename } lpeg.setmaxstack(1000) local ast, label, sfail = lpeg.match(G, subject, nil, errorinfo) if not ast then local errpos = #subject-#sfail+1 local errmsg = labels[label][2] return ast, syntaxerror(errorinfo, errpos, errmsg) end return validate(ast, errorinfo) end return parser