From a46ac380e8c39975c7d2e8ac937c02023b783493 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Reuh=20Fildadut?= Date: Wed, 3 Jan 2024 17:59:44 +0100 Subject: [PATCH] Only serialize referenced upvalues in function serialization --- anselme/ast/Environment.lua | 7 ++---- anselme/ast/Function.lua | 46 +++++++++++++++++++++++++------------ anselme/ast/LuaFunction.lua | 8 +++++++ ideas.md | 7 ------ 4 files changed, 41 insertions(+), 27 deletions(-) diff --git a/anselme/ast/Environment.lua b/anselme/ast/Environment.lua index fbd55cc..ab8eacd 100644 --- a/anselme/ast/Environment.lua +++ b/anselme/ast/Environment.lua @@ -85,13 +85,14 @@ local Environment = ast.abstract.Runtime { self._lookup_cache = {} self._lookup_cache_current = {} end, - -- precache variable + -- precache variable and return its variable metadata -- when cached, if a variable is defined in a parent scope after it has been cached here from a higher parent, it will not be used in this env -- most of the time scopes are discarded after a pop so there's no possibility for this anyway, except for closures as they restore an old environment -- in which case we may want to precache variables that appear in the function so future definitions don't affect the closure precache = function(self, state, identifier) self:_lookup(state, identifier) self:_lookup_in_current(state, identifier:to_symbol()) + return self:_lookup(state, identifier) end, traverse = function(self, fn, ...) @@ -194,10 +195,6 @@ local Environment = ast.abstract.Runtime { defined_in_current = function(self, state, symbol) return self:_lookup_in_current(state, symbol) ~= nil end, - -- return bool if variable is defined in the current environment only - won't search in parent env for exported & partial names - defined_in_current_strict = function(self, state, identifier) - return self.variables:has(state, identifier) and not self.variables:get(state, identifier):undefined(state) - end, -- get variable in current or parent scope, with metadata _get_variable = function(self, state, identifier) diff --git a/anselme/ast/Function.lua b/anselme/ast/Function.lua index a122a19..a3bb771 100644 --- a/anselme/ast/Function.lua +++ b/anselme/ast/Function.lua @@ -2,19 +2,19 @@ local ast = require("anselme.ast") local Overloadable = ast.abstract.Overloadable -local ReturnBoundary +local ReturnBoundary, Environment local operator_priority = require("anselme.common").operator_priority local resume_manager, calling_environment_manager -local function list_upvalues(v, l) +local function list_cache_upvalues(v, state, list, scope) if ast.Identifier:is(v) then - table.insert(l, v) + list[v.name] = scope:precache(state, v) elseif ast.Symbol:is(v) then - table.insert(l, v:to_identifier()) + list[v.string] = scope:precache(state, v:to_identifier()) end - v:traverse(list_upvalues, l) + v:traverse(list_cache_upvalues, state, list, scope) end local Function @@ -24,13 +24,14 @@ Function = Overloadable { parameters = nil, -- ParameterTuple expression = nil, -- function content scope = nil, -- Environment; captured scope for closure (evaluated functions); not set when not evaluated - upvalues = nil, -- list of identifiers; not set when not evaluated. Contain _at least_ all the upvalues explicitely defined in the function code. + upvalues = nil, -- {[name]=variable metadata}; not set when not evaluated. Contain _at least_ all the upvalues explicitely defined in the function code. init = function(self, parameters, expression, scope, upvalues) self.parameters = parameters self.expression = expression self.scope = scope self.upvalues = upvalues + if self.scope then self._evaluated = true end end, with_return_boundary = function(self, parameters, expression) return Function:new(parameters, ReturnBoundary:new(expression)) @@ -62,16 +63,11 @@ Function = Overloadable { local scope = state.scope:capture() -- capture current scope to build closure state.scope:pop() - -- get upvalues + -- list & cache upvalues so they aren't affected by future redefinition in a parent scope local upvalues = {} - self.expression:traverse(list_upvalues, upvalues) + self.expression:traverse(list_cache_upvalues, state, upvalues, scope) if scope:defined(state, ast.Identifier:new("_")) then - scope:get(state, ast.Identifier:new("_")):traverse(list_upvalues, upvalues) - end - - -- cache upvalues so they aren't affected by future redefinition in a parent scope - for _, ident in ipairs(upvalues) do - scope:precache(state, ident) + scope:get(state, ast.Identifier:new("_")):traverse(list_cache_upvalues, state, upvalues, scope) end return Function:new(self.parameters:eval(state), self.expression, scope, upvalues) @@ -124,10 +120,30 @@ Function = Overloadable { state.scope:pop() return exp end, + + -- Note: when serializing and reloading a function, its upvalues will not be linked anymore to their original definition. + -- The reloaded function will not be able to affect variables defined outside its body. + -- Only the upvalues that explicitely appear in the function body will be saved, so we don't have to keep a copy of the whole environment. + _serialize = function(self) + return { parameters = self.parameters, expression = self.expression, upvalues = self.upvalues } + end, + _deserialize = function(self) + local state = require("anselme.serializer_state") + local scope + if self.upvalues then + -- rebuild scope: exported + normal layer so any upvalue that happen to be exported stay there + -- (and link again to current scope to allow internal vars that are not considered explicit upvalues to still work, like _translations) + scope = Environment:new(state, Environment:new(state, state.scope:capture(), nil, true)) + for _, var in pairs(self.upvalues) do + scope:define(state, var:get_symbol(), var:get(state)) + end + end + return Function:new(self.parameters, self.expression, Environment:new(state, scope), self.upvalues) + end } package.loaded[...] = Function -ReturnBoundary = ast.ReturnBoundary +ReturnBoundary, Environment = ast.ReturnBoundary, ast.Environment resume_manager = require("anselme.state.resume_manager") calling_environment_manager = require("anselme.state.calling_environment_manager") diff --git a/anselme/ast/LuaFunction.lua b/anselme/ast/LuaFunction.lua index 9d87f7c..a2c432e 100644 --- a/anselme/ast/LuaFunction.lua +++ b/anselme/ast/LuaFunction.lua @@ -75,6 +75,14 @@ LuaFunction = ast.abstract.Runtime(Overloadable) { to_lua = function(self, state) return self.func end, + + -- TODO: binser does not serialize lua function upvalues! + _serialize = function(self) + error("LuaFunction can not be serialized") + end, + _deserialize = function(self) + error("LuaFunction can not be serialized") + end } package.loaded[...] = LuaFunction diff --git a/ideas.md b/ideas.md index 4600a14..279c77a 100644 --- a/ideas.md +++ b/ideas.md @@ -22,13 +22,6 @@ Do some more fancy scope work to allow the translation to access variables defin --- -Persistence "issue": Storing a closure stores it whole environment, which includes all the stdlib. Technically it works, but that's a lot of useless information. Would need to track which variable is used (should be doable in prepare) and prune the closure (list identifiers and symbols used in children and regroup in a single exported+normal layer). The closure would also captures things like _translations that should not be persisted and prevent any update to it or its upvalues (the captured scope in the closure will not be able to be linked with the real scope in the reloaded script)... -Or register all functions as ressources in binser - that makes kinda sense, they're immutable, and their signature should be unique. Would need to track which functions are safe to skip / can be reloaded from somewhere on load. Would need to distinguish anonymous from non anonymous functions... - -Or just say closures probably shouldn't be persisted. Yeah, probably easier. - ---- - Standard library. * Text manipulation would make sense, but that would require a full UTF-8/Unicode support library like https://github.com/starwing/luautf8.