mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/Scribunto
synced 2024-11-30 11:04:17 +00:00
84d96e843c
The LuaStandalone interpreter needs to keep a mapping from integers returned to PHP to the corresponding function. But if it never releases these functions when PHP no longer has any reference to them, it can result in Lua running out of memory if a module with a large number of functions is invoked many times in one page. The fix here is to track which function ids are referenced from PHP, and periodically send the list to Lua so it can remove any that are no longer used from its cache. This also takes care of another issue where having multiple interpreter instances and passing function objects from one into another could call the wrong function in Lua. Bug: 51886 Change-Id: I4f15841051f7748d1d6df24080949e5cbd88f217
654 lines
17 KiB
Lua
654 lines
17 KiB
Lua
MWServer = {}
|
|
|
|
--- Create a new MWServer object
|
|
function MWServer:new( interpreterId )
|
|
interpreterId = tonumber( interpreterId )
|
|
if not interpreterId then
|
|
error( "bad argument #1 to 'MWServer:new' (must be a number or convertible to a number)", 2 )
|
|
end
|
|
|
|
obj = {
|
|
interpreterId = interpreterId,
|
|
nextChunkId = 1,
|
|
chunks = {},
|
|
xchunks = {},
|
|
protectedFunctions = {},
|
|
protectedEnvironments = {},
|
|
baseEnv = {}
|
|
}
|
|
setmetatable( obj, self )
|
|
self.__index = self
|
|
|
|
obj:init()
|
|
|
|
return obj
|
|
end
|
|
|
|
--- Initialise a new MWServer object
|
|
function MWServer:init()
|
|
self.baseEnv = self:newEnvironment()
|
|
for funcName, func in pairs( self ) do
|
|
if type(func) == 'function' then
|
|
self.protectedFunctions[func] = true
|
|
end
|
|
end
|
|
self.protectedEnvironments[_G] = true
|
|
end
|
|
|
|
--- Serve requests until exit is requested
|
|
function MWServer:execute()
|
|
self:dispatch( nil )
|
|
self:debug( 'MWServer:execute: returning' )
|
|
end
|
|
|
|
-- Convert a multiple-return-value or a ... into a count and a table
|
|
function MWServer:listToCountAndTable( ... )
|
|
return select( '#', ... ), { ... }
|
|
end
|
|
|
|
--- Call a PHP function
|
|
-- Raise an error if the PHP handler requests it. May return any number
|
|
-- of values.
|
|
--
|
|
-- @param id The function ID, specified by a registerLibrary message
|
|
-- @param nargs Count of function arguments
|
|
-- @param args The function arguments
|
|
-- @return The return values from the PHP function
|
|
function MWServer:call( id, nargs, args )
|
|
local result = self:dispatch( {
|
|
op = 'call',
|
|
id = id,
|
|
nargs = nargs,
|
|
args = args
|
|
} )
|
|
if result.op == 'return' then
|
|
return unpack( result.values, 1, result.nvalues )
|
|
elseif result.op == 'error' then
|
|
-- Raise an error in the actual user code that called the function
|
|
-- The level is 3 since our immediate caller is a closure
|
|
error( result.value, 3 )
|
|
else
|
|
self:internalError( 'MWServer:call: unexpected result op' )
|
|
end
|
|
end
|
|
|
|
--- Handle a "call" message from PHP. Call the relevant function.
|
|
--
|
|
-- @param message The message from PHP
|
|
-- @return A response message to send back to PHP
|
|
function MWServer:handleCall( message )
|
|
if not self.chunks[message.id] then
|
|
return {
|
|
op = 'error',
|
|
value = 'function id ' .. message.id .. ' does not exist'
|
|
}
|
|
end
|
|
|
|
local n, result = self:listToCountAndTable( xpcall(
|
|
function ()
|
|
return self.chunks[message.id]( unpack( message.args, 1, message.nargs ) )
|
|
end,
|
|
function ( err )
|
|
return MWServer:attachTrace( err )
|
|
end
|
|
) )
|
|
|
|
if result[1] then
|
|
-- table.remove( result, 1 ) renumbers from 2 to #result. But #result
|
|
-- is not necessarily "right" if result contains nils.
|
|
result = { unpack( result, 2, n ) }
|
|
return {
|
|
op = 'return',
|
|
nvalues = n - 1,
|
|
values = result
|
|
}
|
|
else
|
|
if result[2].value and result[2].trace then
|
|
return {
|
|
op = 'error',
|
|
value = result[2].value,
|
|
trace = result[2].trace,
|
|
}
|
|
else
|
|
return {
|
|
op = 'error',
|
|
value = result[2]
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
--- The xpcall() error handler for handleCall(). Modifies the error object
|
|
-- to include a structured backtrace
|
|
--
|
|
-- @param err The error object
|
|
-- @return The new error object
|
|
function MWServer:attachTrace( err )
|
|
return {
|
|
value = err,
|
|
trace = self:getStructuredTrace( 2 )
|
|
}
|
|
end
|
|
|
|
--- Handle a "loadString" message from PHP.
|
|
-- Load the function and return a chunk ID.
|
|
--
|
|
-- @param message The message from PHP
|
|
-- @return A response message to send back to PHP
|
|
function MWServer:handleLoadString( message )
|
|
if string.find( message.text, '\27Lua', 1, true ) then
|
|
return {
|
|
op = 'error',
|
|
value = 'cannot load code with a Lua binary chunk marker escape sequence in it'
|
|
}
|
|
end
|
|
local chunk, errorMsg = loadstring( message.text, message.chunkName )
|
|
if chunk then
|
|
setfenv( chunk, self.baseEnv )
|
|
local id = self:addChunk( chunk )
|
|
return {
|
|
op = 'return',
|
|
nvalues = 1,
|
|
values = {id}
|
|
}
|
|
else
|
|
return {
|
|
op = 'error',
|
|
value = errorMsg
|
|
}
|
|
end
|
|
end
|
|
|
|
--- Add a function value to the list of tracked chunks and return its associated ID.
|
|
-- Adding a chunk allows it to be referred to in messages from PHP.
|
|
--
|
|
-- @param chunk The function value
|
|
-- @return The chunk ID
|
|
function MWServer:addChunk( chunk )
|
|
local id = self.nextChunkId
|
|
self.nextChunkId = id + 1
|
|
self.chunks[id] = chunk
|
|
self.xchunks[chunk] = id
|
|
return id
|
|
end
|
|
|
|
--- Handle a "cleanupChunks" message from PHP.
|
|
-- Remove any chunks no longer referenced by PHP code.
|
|
--
|
|
-- @param message The message from PHP
|
|
-- @return A response message to send back to PHP
|
|
function MWServer:handleCleanupChunks( message )
|
|
for id, chunk in pairs( self.chunks ) do
|
|
if not message.ids[id] then
|
|
self.chunks[id] = nil
|
|
self.xchunks[chunk] = nil
|
|
end
|
|
end
|
|
|
|
return {
|
|
op = 'return',
|
|
nvalues = 0,
|
|
values = {}
|
|
}
|
|
end
|
|
|
|
--- Handle a "registerLibrary" message from PHP.
|
|
-- Add the relevant functions to the base environment.
|
|
--
|
|
-- @param message The message from PHP
|
|
-- @return The response message
|
|
function MWServer:handleRegisterLibrary( message )
|
|
local startPos = 1
|
|
local component
|
|
if not self.baseEnv[message.name] then
|
|
self.baseEnv[message.name] = {}
|
|
end
|
|
local t = self.baseEnv[message.name]
|
|
|
|
for name, id in pairs( message.functions ) do
|
|
t[name] = function( ... )
|
|
return self:call( id, self:listToCountAndTable( ... ) )
|
|
end
|
|
-- Protect the function against setfenv()
|
|
self.protectedFunctions[t[name]] = true
|
|
end
|
|
|
|
return {
|
|
op = 'return',
|
|
nvalues = 0,
|
|
values = {}
|
|
}
|
|
end
|
|
|
|
--- Handle a "wrapPhpFunction" message from PHP.
|
|
-- Create an anonymous function
|
|
--
|
|
-- @param message The message from PHP
|
|
-- @return The response message
|
|
function MWServer:handleWrapPhpFunction( message )
|
|
local id = message.id
|
|
local func = function( ... )
|
|
return self:call( id, self:listToCountAndTable( ... ) )
|
|
end
|
|
-- Protect the function against setfenv()
|
|
self.protectedFunctions[func] = true
|
|
|
|
return {
|
|
op = 'return',
|
|
nvalues = 1,
|
|
values = { func }
|
|
}
|
|
end
|
|
|
|
--- Handle a "getStatus" message from PHP
|
|
--
|
|
-- @param message The request message
|
|
-- @return The response message
|
|
function MWServer:handleGetStatus( message )
|
|
local nullRet = {
|
|
op = 'return',
|
|
nvalues = 0,
|
|
values = {}
|
|
}
|
|
local file = io.open( '/proc/self/stat' )
|
|
if not file then
|
|
return nullRet
|
|
end
|
|
local s = file:read('*a')
|
|
file:close()
|
|
local t = {}
|
|
for token in string.gmatch(s, '[^ ]+') do
|
|
t[#t + 1] = token
|
|
end
|
|
if #t < 22 then
|
|
return nullRet
|
|
end
|
|
return {
|
|
op = 'return',
|
|
nvalues = 1,
|
|
values = {{
|
|
pid = tonumber(t[1]),
|
|
time = tonumber(t[14]) + tonumber(t[15]) + tonumber(t[16]) + tonumber(t[17]),
|
|
vsize = tonumber(t[23]),
|
|
}}
|
|
}
|
|
end
|
|
|
|
--- The main request/response loop
|
|
--
|
|
-- Send a request message and return its matching reply message. Handle any
|
|
-- intervening requests (i.e. re-entrant calls) by dispatching them to the
|
|
-- relevant handler function.
|
|
--
|
|
-- The request message may optionally be omitted, to listen for request messages
|
|
-- without first sending a request of its own. Such a dispatch() call will
|
|
-- continue running until termination is requested by PHP. Typically, PHP does
|
|
-- this with a SIGTERM signal.
|
|
--
|
|
-- @param msgToPhp The message to send to PHP. Optional.
|
|
-- @return The matching response message
|
|
function MWServer:dispatch( msgToPhp )
|
|
if msgToPhp then
|
|
self:sendMessage( msgToPhp )
|
|
end
|
|
while true do
|
|
local msgFromPhp = self:receiveMessage()
|
|
local msgToPhp
|
|
local op = msgFromPhp.op
|
|
if op == 'return' or op == 'error' then
|
|
return msgFromPhp
|
|
elseif op == 'call' then
|
|
msgToPhp = self:handleCall( msgFromPhp )
|
|
self:sendMessage( msgToPhp )
|
|
elseif op == 'loadString' then
|
|
msgToPhp = self:handleLoadString( msgFromPhp )
|
|
self:sendMessage( msgToPhp )
|
|
elseif op == 'registerLibrary' then
|
|
msgToPhp = self:handleRegisterLibrary( msgFromPhp )
|
|
self:sendMessage( msgToPhp )
|
|
elseif op == 'wrapPhpFunction' then
|
|
msgToPhp = self:handleWrapPhpFunction( msgFromPhp )
|
|
self:sendMessage( msgToPhp )
|
|
elseif op == 'cleanupChunks' then
|
|
msgToPhp = self:handleCleanupChunks( msgFromPhp )
|
|
self:sendMessage( msgToPhp )
|
|
elseif op == 'getStatus' then
|
|
msgToPhp = self:handleGetStatus( msgFromPhp )
|
|
self:sendMessage( msgToPhp )
|
|
elseif op == 'quit' then
|
|
self:debug( 'MWServer:dispatch: quit message received' )
|
|
os.exit(0)
|
|
else
|
|
self:internalError( "Invalid message operation" )
|
|
end
|
|
end
|
|
end
|
|
|
|
--- Write a message to the debug output stream.
|
|
-- Some day this may be configurable, currently it just unconditionally writes
|
|
-- the message to stderr. The PHP host will redirect those errors to /dev/null
|
|
-- by default, but it can be configured to send them to a file.
|
|
--
|
|
-- @param s The message
|
|
function MWServer:debug( s )
|
|
if ( type(s) == 'string' ) then
|
|
io.stderr:write( s .. '\n' )
|
|
else
|
|
io.stderr:write( self:serialize( s ) .. '\n' )
|
|
end
|
|
end
|
|
|
|
--- Raise an internal error
|
|
-- Write a message to stderr and then exit with a failure status. This should
|
|
-- be called for errors which cannot be allowed to be caught with pcall().
|
|
--
|
|
-- This must be used for protocol errors, or indeed any error from a context
|
|
-- where a dispatch() call lies between the error source and a possible pcall()
|
|
-- handler. If dispatch() were terminated by a regular error() call, the
|
|
-- resulting protocol violation could lead to a deadlock.
|
|
--
|
|
-- @param msg The error message
|
|
function MWServer:internalError( msg )
|
|
io.stderr:write( debug.traceback( msg ) .. '\n' )
|
|
os.exit( 1 )
|
|
end
|
|
|
|
--- Raise an I/O error
|
|
-- Helper function for errors from the io and file modules, which may optionally
|
|
-- return an informative error message as their second return value.
|
|
function MWServer:ioError( header, info )
|
|
if type( info) == 'string' then
|
|
self:internalError( header .. ': ' .. info )
|
|
else
|
|
self:internalError( header )
|
|
end
|
|
end
|
|
|
|
--- Send a message to PHP
|
|
-- @param msg The message table
|
|
function MWServer:sendMessage( msg )
|
|
if not msg.op then
|
|
self:internalError( "MWServer:sendMessage: invalid message", 2 )
|
|
end
|
|
self:debug('TX ==> ' .. msg.op)
|
|
local encMsg = self:encodeMessage( msg )
|
|
local success, errorMsg = io.stdout:write( encMsg )
|
|
if not success then
|
|
self:ioError( 'Write error', errorMsg )
|
|
end
|
|
io.stdout:flush()
|
|
end
|
|
|
|
--- Wait for a message from PHP and then decode and return it as a table
|
|
-- @return The received message
|
|
function MWServer:receiveMessage()
|
|
-- Read the header
|
|
local header, errorMsg = io.stdin:read( 16 )
|
|
if header == nil and errorMsg == nil then
|
|
-- End of file on stdin, exit gracefully
|
|
os.exit(0)
|
|
end
|
|
|
|
if not header or #header ~= 16 then
|
|
self:ioError( 'Read error', errorMsg )
|
|
end
|
|
local length = self:decodeHeader( header )
|
|
|
|
-- Read the body
|
|
local body, errorMsg = io.stdin:read( length )
|
|
if not body then
|
|
self:ioError( 'Read error', errorMsg )
|
|
end
|
|
if #body ~= length then
|
|
self:ioError( 'Read error', errorMsg )
|
|
end
|
|
|
|
-- Unserialize it
|
|
msg = self:unserialize( body )
|
|
self:debug('RX <== ' .. msg.op)
|
|
if msg.op == 'error' then
|
|
self:debug( 'Error: ' .. tostring( msg.value ) )
|
|
end
|
|
return msg
|
|
end
|
|
|
|
--- Encode a message for sending to PHP
|
|
function MWServer:encodeMessage( message )
|
|
local serialized = self:serialize( message )
|
|
local length = #serialized
|
|
local check = length * 2 - 1
|
|
return string.format( '%08x%08x', length, check ) .. serialized
|
|
end
|
|
|
|
-- Faster to create the table once than for each call to MWServer:serialize()
|
|
local serialize_replacements = {
|
|
['\r'] = '\\r',
|
|
['\n'] = '\\n',
|
|
['\\'] = '\\\\',
|
|
}
|
|
|
|
--- Convert a value to a string suitable for passing to PHP's unserialize().
|
|
-- Note that the following replacements must be performed before calling
|
|
-- unserialize:
|
|
-- "\\r" => "\r"
|
|
-- "\\n" => "\n"
|
|
-- "\\\\" => "\\"
|
|
--
|
|
-- @param var The value.
|
|
function MWServer:serialize( var )
|
|
local done = {}
|
|
local int_min = -2147483648
|
|
local int_max = 2147483647
|
|
|
|
local function isInteger( var )
|
|
return type(var) == 'number'
|
|
and math.floor( var ) == var
|
|
and var >= int_min
|
|
and var <= int_max
|
|
end
|
|
|
|
local function recursiveEncode( var, level )
|
|
local t = type( var )
|
|
if t == 'nil' then
|
|
return 'N;'
|
|
elseif t == 'number' then
|
|
if isInteger(var) then
|
|
return 'i:' .. var .. ';'
|
|
elseif var < math.huge and var > -math.huge then
|
|
return 'd:' .. var .. ';'
|
|
elseif var == math.huge then
|
|
return 'd:INF;'
|
|
elseif var == -math.huge then
|
|
return 'd:-INF;'
|
|
else
|
|
return 'd:NAN;'
|
|
end
|
|
elseif t == 'string' then
|
|
return 's:' .. string.len( var ) .. ':"' .. var .. '";'
|
|
elseif t == 'boolean' then
|
|
if var then
|
|
return 'b:1;'
|
|
else
|
|
return 'b:0;'
|
|
end
|
|
elseif t == 'table' then
|
|
if done[var] then
|
|
error("Cannot pass circular reference to PHP")
|
|
end
|
|
done[var] = true
|
|
local buf = { '' }
|
|
local numElements = 0
|
|
for key, value in pairs(var) do
|
|
local t = type( key )
|
|
if t == 'number' or t == 'string' then
|
|
if (isInteger(key)) then
|
|
buf[#buf + 1] = 'i:' .. key .. ';'
|
|
else
|
|
buf[#buf + 1] = recursiveEncode( tostring( key ), level + 1 )
|
|
end
|
|
else
|
|
error("Cannot use " .. type( key ) .. " as an array key when passing data from Lua to PHP");
|
|
end
|
|
buf[#buf + 1] = recursiveEncode( value, level + 1 )
|
|
numElements = numElements + 1
|
|
end
|
|
buf[1] = 'a:' .. numElements .. ':{'
|
|
buf[#buf + 1] = '}'
|
|
return table.concat(buf)
|
|
elseif t == 'function' then
|
|
local id
|
|
if self.xchunks[var] then
|
|
id = self.xchunks[var]
|
|
else
|
|
id = self:addChunk(var)
|
|
end
|
|
return 'O:42:"Scribunto_LuaStandaloneInterpreterFunction":2:{s:13:"interpreterId";i:' ..
|
|
self.interpreterId .. ';s:2:"id";i:' .. id .. ';}'
|
|
elseif t == 'thread' then
|
|
error("Cannot pass thread to PHP")
|
|
elseif t == 'userdata' then
|
|
error("Cannot pass userdata to PHP")
|
|
else
|
|
error("Cannot pass unrecognised type to PHP")
|
|
end
|
|
end
|
|
|
|
return recursiveEncode( var, 0 ):gsub( '[\r\n\\]', serialize_replacements )
|
|
end
|
|
|
|
--- Convert a Lua expression string to its corresponding value.
|
|
-- Convert any references of the form chunk[id] to the corresponding function
|
|
-- values.
|
|
function MWServer:unserialize( text )
|
|
local func = loadstring( 'return ' .. text )
|
|
if not func then
|
|
self:internalError( "MWServer:unserialize: invalid chunk" )
|
|
end
|
|
-- Don't waste JIT cache space by storing every message in it
|
|
if jit then
|
|
jit.off( func )
|
|
end
|
|
setfenv( func, { chunks = self.chunks } )
|
|
return func()
|
|
end
|
|
|
|
--- Decode a message header.
|
|
-- @param header The header string
|
|
-- @return The body length
|
|
function MWServer:decodeHeader( header )
|
|
local length = string.sub( header, 1, 8 )
|
|
local check = string.sub( header, 9, 16 )
|
|
if not string.match( length, '^%x+$' ) or not string.match( check, '^%x+$' ) then
|
|
self:internalError( "Error decoding message header: " .. length .. '/' .. check )
|
|
end
|
|
length = tonumber( length, 16 )
|
|
check = tonumber( check, 16 )
|
|
if length * 2 - 1 ~= check then
|
|
self:internalError( "Error decoding message header" )
|
|
end
|
|
return length
|
|
end
|
|
|
|
--- Get a traceback similar to the one from debug.traceback(), but as a table
|
|
-- rather than formatted as a string
|
|
--
|
|
-- @param The level to start at: 1 for the function that called getStructuredTrace()
|
|
-- @return A table with the backtrace information
|
|
function MWServer:getStructuredTrace( level )
|
|
level = level + 1
|
|
local trace = {}
|
|
while true do
|
|
local rawInfo = debug.getinfo( level, 'nSl' )
|
|
if rawInfo == nil then
|
|
break
|
|
end
|
|
local info = {}
|
|
for i, key in ipairs({'short_src', 'what', 'currentline', 'name', 'namewhat', 'linedefined'}) do
|
|
info[key] = rawInfo[key]
|
|
end
|
|
if string.match( info['short_src'], '/MWServer.lua$' ) then
|
|
info['short_src'] = 'MWServer.lua'
|
|
end
|
|
if string.match( rawInfo['short_src'], '/mw_main.lua$' ) then
|
|
info['short_src'] = 'mw_main.lua'
|
|
end
|
|
table.insert( trace, info )
|
|
level = level + 1
|
|
end
|
|
return trace
|
|
end
|
|
|
|
--- Create a table to be used as a restricted environment, based on the current
|
|
-- global environment.
|
|
--
|
|
-- @return The environment table
|
|
function MWServer:newEnvironment()
|
|
local allowedGlobals = {
|
|
-- base
|
|
"assert",
|
|
"error",
|
|
"getmetatable",
|
|
"ipairs",
|
|
"next",
|
|
"pairs",
|
|
"pcall",
|
|
"rawequal",
|
|
"rawget",
|
|
"rawset",
|
|
"select",
|
|
"setmetatable",
|
|
"tonumber",
|
|
"type",
|
|
"unpack",
|
|
"xpcall",
|
|
"_VERSION",
|
|
-- libs
|
|
"table",
|
|
"math"
|
|
}
|
|
|
|
local env = {}
|
|
for i = 1, #allowedGlobals do
|
|
env[allowedGlobals[i]] = mw.clone( _G[allowedGlobals[i]] )
|
|
end
|
|
|
|
-- Cloning 'string' doesn't work right, because strings still use the old
|
|
-- 'string' as the metatable. So just copy it.
|
|
env.string = string
|
|
|
|
env._G = env
|
|
env.tostring = function( val )
|
|
return self:tostring( val )
|
|
end
|
|
env.string.dump = nil
|
|
env.setfenv, env.getfenv = mw.makeProtectedEnvFuncs(
|
|
self.protectedEnvironments, self.protectedFunctions )
|
|
env.debug = {
|
|
traceback = debug.traceback
|
|
}
|
|
env.os = {
|
|
date = os.date,
|
|
difftime = os.difftime,
|
|
time = os.time,
|
|
clock = os.clock
|
|
}
|
|
return env
|
|
end
|
|
|
|
--- An implementation of tostring() which does not expose pointers.
|
|
function MWServer:tostring(val)
|
|
local mt = getmetatable( val )
|
|
if mt and mt.__tostring then
|
|
return mt.__tostring(val)
|
|
end
|
|
local typeName = type(val)
|
|
local nonPointerTypes = {number = true, string = true, boolean = true, ['nil'] = true}
|
|
if nonPointerTypes[typeName] then
|
|
return tostring(val)
|
|
else
|
|
return typeName
|
|
end
|
|
end
|
|
|
|
return MWServer
|