mediawiki-extensions-Scribunto/includes/Hooks.php

450 lines
13 KiB
PHP
Raw Normal View History

<?php
/**
* Wikitext scripting infrastructure for MediaWiki: hooks.
* Copyright (C) 2009-2012 Victor Vasiliev <vasilvv@gmail.com>
* https://www.mediawiki.org/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*/
// phpcs:disable MediaWiki.NamingConventions.LowerCamelFunctionsName.FunctionName
namespace MediaWiki\Extension\Scribunto;
use Article;
use Content;
use EmptyBagOStuff;
use IContextSource;
use MediaWiki\Config\Config;
use MediaWiki\EditPage\EditPage;
use MediaWiki\Hook\EditFilterMergedContentHook;
use MediaWiki\Hook\EditPage__showReadOnlyForm_initialHook;
use MediaWiki\Hook\EditPage__showStandardInputs_optionsHook;
use MediaWiki\Hook\EditPageBeforeEditButtonsHook;
use MediaWiki\Hook\ParserClearStateHook;
use MediaWiki\Hook\ParserClonedHook;
use MediaWiki\Hook\ParserFirstCallInitHook;
use MediaWiki\Hook\ParserLimitReportFormatHook;
use MediaWiki\Hook\ParserLimitReportPrepareHook;
use MediaWiki\Hook\SoftwareInfoHook;
use MediaWiki\Html\Html;
use MediaWiki\MediaWikiServices;
use MediaWiki\Output\OutputPage;
use MediaWiki\Page\Hook\ArticleViewHeaderHook;
use MediaWiki\Parser\ParserOutput;
use MediaWiki\Revision\Hook\ContentHandlerDefaultModelForHook;
use MediaWiki\Status\Status;
use MediaWiki\Title\Title;
use MediaWiki\User\User;
use MediaWiki\WikiMap\WikiMap;
use ObjectCache;
use Parser;
use PPFrame;
use UtfNormal\Validator;
use Wikimedia\PSquare;
use Xml;
/**
2012-04-06 05:04:30 +00:00
* Hooks for the Scribunto extension.
*/
class Hooks implements
SoftwareInfoHook,
ParserFirstCallInitHook,
ParserLimitReportPrepareHook,
ParserLimitReportFormatHook,
ParserClearStateHook,
ParserClonedHook,
EditPage__showStandardInputs_optionsHook,
EditPage__showReadOnlyForm_initialHook,
EditPageBeforeEditButtonsHook,
EditFilterMergedContentHook,
ArticleViewHeaderHook,
ContentHandlerDefaultModelForHook
{
private Config $config;
public function __construct(
Config $config
) {
$this->config = $config;
}
/**
* Define content handler constant upon extension registration
*/
public static function onRegistration() {
define( 'CONTENT_MODEL_SCRIBUNTO', 'Scribunto' );
}
/**
* Get software information for Special:Version
*
* @param array &$software
* @return bool
*/
public function onSoftwareInfo( &$software ) {
$engine = Scribunto::newDefaultEngine();
$engine->setTitle( Title::makeTitle( NS_SPECIAL, 'Version' ) );
$engine->getSoftwareInfo( $software );
return true;
}
/**
* Register parser hooks.
*
* @param Parser $parser
* @return bool
*/
public function onParserFirstCallInit( $parser ) {
$parser->setFunctionHook( 'invoke', [ $this, 'invokeHook' ], Parser::SFH_OBJECT_ARGS );
return true;
}
/**
* Called when the interpreter is to be reset.
*
* @param Parser $parser
* @return bool
*/
public function onParserClearState( $parser ) {
2012-04-06 05:04:30 +00:00
Scribunto::resetParserEngine( $parser );
return true;
}
/**
* Called when the parser is cloned
*
* @param Parser $parser
* @return bool
*/
public function onParserCloned( $parser ) {
$parser->scribunto_engine = null;
return true;
}
/**
* Hook function for {{#invoke:module|func}}
*
* @param Parser $parser
* @param PPFrame $frame
* @param array $args
* @return string
*/
public function invokeHook( Parser $parser, PPFrame $frame, array $args ) {
try {
2012-05-22 03:56:07 +00:00
if ( count( $args ) < 2 ) {
throw new ScribuntoException( 'scribunto-common-nofunction' );
}
$moduleName = trim( $frame->expand( $args[0] ) );
2012-04-06 05:04:30 +00:00
$engine = Scribunto::getParserEngine( $parser );
* Removed scriptlinks table. It just seemed the same as templatelinks to me, and tl_namespace can be used if you want to separate out modules. * Used Parser::fetchTemplateAndTitle() to get modules and register them in templatelinks. Most of the logic was previously duplicated. * Changed the configuration and factory functions to allow for the possibility of multiple engines coexisting on the one wiki. * Made the $parser parameter optional, to improve debugging in the case where a parser is needed but parsing has not started. Removed all $wgParser references. * Renamed Scripting::getEngine() to getParserEngine() and resetEngine() to resetParserEngine() * Removed setOptions() and updateOptions(). If you want to change the options, you can always make a new instance. * Renamed getModule() to fetchModuleFromParser() * Simplified module constructor parameters and member variable list * Fixed spelling error langauge -> language * Renamed a few variables for clarity: $module -> $moduleName, $function -> $functionName * Renamed getLimitsReport() to getLimitReport() as it is in Parser * Use an accessor for getting LuaSandboxEngineModule::$contents * Renamed configuration variable maxCPU to cpuLimit * Include the full message name as a parameter to ScriptingException. This makes it easier to find messages in the i18n file, and it makes it easier to find invocation points when a translator wants to know how a message is used. Adding the message name as a comment on the same line seems like a waste of space when you can just make it an actual parameter. * Reduce the number of formal parameters to ScriptingException::__construct(), since there is already too many and we may want to add more things later, such as backtraces with hyperlinks and other such stuff. * Include the code location as $2 unconditionally so that there is less chance of getting the parameters wrong * Shortened some message names. Wrote English text for messages without it.
2012-04-05 07:58:02 +00:00
$title = Title::makeTitleSafe( NS_MODULE, $moduleName );
if ( !$title || !$title->hasContentModel( CONTENT_MODEL_SCRIBUNTO ) ) {
throw new ScribuntoException( 'scribunto-common-nosuchmodule',
[ 'args' => [ $moduleName ] ] );
}
* Removed scriptlinks table. It just seemed the same as templatelinks to me, and tl_namespace can be used if you want to separate out modules. * Used Parser::fetchTemplateAndTitle() to get modules and register them in templatelinks. Most of the logic was previously duplicated. * Changed the configuration and factory functions to allow for the possibility of multiple engines coexisting on the one wiki. * Made the $parser parameter optional, to improve debugging in the case where a parser is needed but parsing has not started. Removed all $wgParser references. * Renamed Scripting::getEngine() to getParserEngine() and resetEngine() to resetParserEngine() * Removed setOptions() and updateOptions(). If you want to change the options, you can always make a new instance. * Renamed getModule() to fetchModuleFromParser() * Simplified module constructor parameters and member variable list * Fixed spelling error langauge -> language * Renamed a few variables for clarity: $module -> $moduleName, $function -> $functionName * Renamed getLimitsReport() to getLimitReport() as it is in Parser * Use an accessor for getting LuaSandboxEngineModule::$contents * Renamed configuration variable maxCPU to cpuLimit * Include the full message name as a parameter to ScriptingException. This makes it easier to find messages in the i18n file, and it makes it easier to find invocation points when a translator wants to know how a message is used. Adding the message name as a comment on the same line seems like a waste of space when you can just make it an actual parameter. * Reduce the number of formal parameters to ScriptingException::__construct(), since there is already too many and we may want to add more things later, such as backtraces with hyperlinks and other such stuff. * Include the code location as $2 unconditionally so that there is less chance of getting the parameters wrong * Shortened some message names. Wrote English text for messages without it.
2012-04-05 07:58:02 +00:00
$module = $engine->fetchModuleFromParser( $title );
Added more Lua environment features Package library: * Added a simulation of the Lua 5.1 package library. * Removed mw.import(), replaced it with a package loader. Packages can be retrieved from the wiki, using require('Module:Foo'), or from files distributed with Scribunto, using require('foo'). The "Module:" prefix allows for source compatibility with existing Lua code. * Added a couple of libraries from LuaForge: luabit and stringtools. * Made fetchModuleFromParser() return null on error instead of throwing an exception, to more easily support the desired behaviour of the package loader, which needs to return null on error. * Renamed mw.setupEnvironment() to mw.setup() since it is setting up things other than the environment now. * In MWServer:handleRegisterLibrary(), remove the feature which interprets dots in library names, since LuaSandbox doesn't support this. Improved module isolation and related refactoring: * Expose restricted versions of getfenv() and setfenv() to user Lua code. Requires luasandbox r114952. * Don't cache the export list returned by module execution for later function calls. This breaks isolation of #invoke calls, since the local variables are persistent. * Removed ScribuntoFunctionBase and its children, since it doesn't really have a purpose if it can't cache anything. Instead, invoke functions using a module method called invoke(). * Removed Module::initialize(), replaced it with a validate() function. This is a more elegant interface and works better with the new module caching scheme. * Use a Status object for the return value of Engine::validate() instead of an array. Use the formatting facilities of the Status class. Other: * Removed "too many returns" error, doesn't fit in with Lua conventions. * Use the standalone engine by default, so that the extension will work without configuration for more people. * Added an accessor for $engine->interpreter * Fix mw.clone() to correctly clone metatables * If the standalone interpreter exits due to an error, there are some contexts where the initial error will be caught and ignored, and the user will see the error from checkValid() instead. In this case, rethrow the original error for a more informative message. * Load mw.lua into the initial standalone environment, to reduce code duplication between mw.lua and MWServer.lua. * Fixed a bug in Scribunto_LuaStandaloneInterpreter::handleCall() for functions that return no results. * Fixed a bug in encodeLuaVar() for strings with "\r". Added test case. * In MWServer.lua, don't call error() for internal errors, instead just print the error and exit. This avoids a protocol violation when an error is encountered from within handleCall(). * Added lots of documentation. Lua doc comments are in LuaDoc format. Change-Id: Ie2fd572c362bedf02f45d3fa5352a5280e034740
2012-04-18 03:46:18 +00:00
if ( !$module ) {
throw new ScribuntoException( 'scribunto-common-nosuchmodule',
[ 'args' => [ $moduleName ] ] );
* Removed scriptlinks table. It just seemed the same as templatelinks to me, and tl_namespace can be used if you want to separate out modules. * Used Parser::fetchTemplateAndTitle() to get modules and register them in templatelinks. Most of the logic was previously duplicated. * Changed the configuration and factory functions to allow for the possibility of multiple engines coexisting on the one wiki. * Made the $parser parameter optional, to improve debugging in the case where a parser is needed but parsing has not started. Removed all $wgParser references. * Renamed Scripting::getEngine() to getParserEngine() and resetEngine() to resetParserEngine() * Removed setOptions() and updateOptions(). If you want to change the options, you can always make a new instance. * Renamed getModule() to fetchModuleFromParser() * Simplified module constructor parameters and member variable list * Fixed spelling error langauge -> language * Renamed a few variables for clarity: $module -> $moduleName, $function -> $functionName * Renamed getLimitsReport() to getLimitReport() as it is in Parser * Use an accessor for getting LuaSandboxEngineModule::$contents * Renamed configuration variable maxCPU to cpuLimit * Include the full message name as a parameter to ScriptingException. This makes it easier to find messages in the i18n file, and it makes it easier to find invocation points when a translator wants to know how a message is used. Adding the message name as a comment on the same line seems like a waste of space when you can just make it an actual parameter. * Reduce the number of formal parameters to ScriptingException::__construct(), since there is already too many and we may want to add more things later, such as backtraces with hyperlinks and other such stuff. * Include the code location as $2 unconditionally so that there is less chance of getting the parameters wrong * Shortened some message names. Wrote English text for messages without it.
2012-04-05 07:58:02 +00:00
}
2012-05-22 03:56:07 +00:00
$functionName = trim( $frame->expand( $args[1] ) );
$bits = $args[1]->splitArg();
2012-05-22 03:56:07 +00:00
unset( $args[0] );
unset( $args[1] );
// If $bits['index'] is empty, then the function name was parsed as a
// key=value pair (because of an equals sign in it), and since it didn't
// have an index, we don't need the index offset.
$childFrame = $frame->newChild( $args, $title, $bits['index'] === '' ? 0 : 1 );
if ( $this->config->get( 'ScribuntoGatherFunctionStats' ) ) {
$u0 = $engine->getResourceUsage( $engine::CPU_SECONDS );
$result = $module->invoke( $functionName, $childFrame );
$u1 = $engine->getResourceUsage( $engine::CPU_SECONDS );
if ( $u1 > $u0 ) {
$timingMs = (int)( 1000 * ( $u1 - $u0 ) );
// Since the overhead of stats is worst when #invoke
// calls are very short, don't process measurements <= 20ms.
if ( $timingMs > 20 ) {
$this->reportTiming( $moduleName, $functionName, $timingMs );
}
}
} else {
$result = $module->invoke( $functionName, $childFrame );
}
return Validator::cleanUp( strval( $result ) );
} catch ( ScribuntoException $e ) {
$trace = $e->getScriptTraceHtml( [ 'msgOptions' => [ 'content' ] ] );
$html = Html::element( 'p', [], $e->getMessage() );
if ( $trace !== false ) {
$html .= Html::element( 'p',
[],
wfMessage( 'scribunto-common-backtrace' )->inContentLanguage()->text()
) . $trace;
} else {
$html .= Html::element( 'p',
[],
wfMessage( 'scribunto-common-no-details' )->inContentLanguage()->text()
);
}
// Index this error by a uniq ID so that we are independent of
// page parse order. (T300979)
// (The only way this will conflict is if two exceptions have
// exactly the same backtrace, in which case we really only need
// one copy of the backtrace!)
$uuid = substr( sha1( $html ), -8 );
$parserOutput = $parser->getOutput();
$parserOutput->appendExtensionData( 'ScribuntoErrors', $uuid );
$parserOutput->setExtensionData( "ScribuntoErrors-$uuid", $html );
$parserOutput->appendJsConfigVar( 'ScribuntoErrors', $uuid );
$parserOutput->setJsConfigVar( "ScribuntoErrors-$uuid", $html );
// These methods are idempotent; doesn't hurt to call them every
// time.
$parser->addTrackingCategory( 'scribunto-common-error-category' );
$parserOutput->addModules( [ 'ext.scribunto.errors' ] );
$id = "mw-scribunto-error-$uuid";
$parserError = htmlspecialchars( $e->getMessage() );
// #iferror-compatible error element
return "<strong class=\"error\"><span class=\"scribunto-error\" id=\"$id\">" .
$parserError . "</span></strong>";
}
}
/**
* Record stats on slow function calls.
*
* @param string $moduleName
* @param string $functionName
* @param int $timing Function execution time in milliseconds.
*/
private function reportTiming( $moduleName, $functionName, $timing ) {
if ( !$this->config->get( 'ScribuntoGatherFunctionStats' ) ) {
return;
}
$threshold = $this->config->get( 'ScribuntoSlowFunctionThreshold' );
if ( !( is_float( $threshold ) && $threshold > 0 && $threshold < 1 ) ) {
return;
}
static $cache;
if ( !$cache ) {
$cache = ObjectCache::getLocalServerInstance( CACHE_NONE );
}
// To control the sampling rate, we keep a compact histogram of
// observations in APC, and extract the Nth percentile (specified
// via $wgScribuntoSlowFunctionThreshold; defaults to 0.90).
// We need a non-empty local server cache for that (e.g. php-apcu).
if ( $cache instanceof EmptyBagOStuff ) {
return;
}
$cacheVersion = '3';
Hooks: Bump scribunto-stats cache version The Wikimedia\PSquare class has changed such that its serialization is incompatible with its prior version. We should probably refactor this to use an actually supported serialization format, possibly by enhancing PSquare with some kind of getter method that provides a plain array that its constructor can accept again instead of relying on internal PHP serialisation format, but for now we can bump the cache version. The side-effect of this will be that the performance stats captured by Scribunto for visualiation in Grafana, will briefly split its sampling logic between two versions instead of being global across all wikis until the train is fully deployed. This does not have any effect on its business logic or runtime behaviour, and is presumably by design as that's the only way to bump the cache version, which was a pre-existing factor in its cache key. Basically, this means that during the deployment days on weeks where this is bumped, we will briefly capture fewer samples as there are then two separate counters trying to reach 1000 before flushing a median to Graphite. We can remedy that by backporting and deploying to both. While at it, I'm changing the cache key to conform to our conventions and make it an explicit, greppable, lowercase and descriptive name (scribuntu-stats) instead of the implicit __METHOD__ which expanded to "Scribunto\Hooks::reportTiming". I note that this means the split brain sampling actually happened at least once before during the week where the namespace was introduced as that will have implicily changed the cache-key. Another reason not to use __METHOD__ in a cache key. Bug: T313341 Change-Id: Ic9dad0f55cba18ec03272b87366a091a396beb74
2022-07-19 19:40:06 +00:00
$key = $cache->makeGlobalKey( 'scribunto-stats', $cacheVersion, (string)$threshold );
// This is a classic "read-update-write" critical section with no
// mutual exclusion, but the only consequence is that some samples
// will be dropped. We only need enough samples to estimate the
// shape of the data, so that's fine.
$ps = $cache->get( $key ) ?: new PSquare( $threshold );
$ps->addObservation( $timing );
$cache->set( $key, $ps, 60 );
if ( $ps->getCount() < 1000 || $timing < $ps->getValue() ) {
return;
}
static $stats;
if ( !$stats ) {
$stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
}
$metricKey = sprintf( 'scribunto.traces.%s__%s__%s', WikiMap::getCurrentWikiId(), $moduleName, $functionName );
$stats->timing( $metricKey, $timing );
}
/**
* Set the Scribunto content handler for modules
*
* @param Title $title
* @param string &$model
* @return bool
*/
public function onContentHandlerDefaultModelFor( $title, &$model ) {
if ( $model === 'sanitized-css' ) {
// Let TemplateStyles override Scribunto
return true;
}
if ( $title->getNamespace() === NS_MODULE ) {
if ( str_ends_with( $title->getText(), '.json' ) ) {
$model = CONTENT_MODEL_JSON;
} elseif ( !Scribunto::isDocPage( $title ) ) {
$model = CONTENT_MODEL_SCRIBUNTO;
}
return true;
}
return true;
}
/**
* Adds report of number of evaluations by the single wikitext page.
*
* @param Parser $parser
* @param ParserOutput $parserOutput
* @return bool
*/
public function onParserLimitReportPrepare( $parser, $parserOutput ) {
if ( Scribunto::isParserEnginePresent( $parser ) ) {
$engine = Scribunto::getParserEngine( $parser );
$engine->reportLimitData( $parserOutput );
}
return true;
}
/**
* Formats the limit report data
*
* @param string $key
* @param mixed &$value
* @param string &$report
* @param bool $isHTML
* @param bool $localize
* @return bool
*/
public function onParserLimitReportFormat( $key, &$value, &$report, $isHTML, $localize ) {
$engine = Scribunto::newDefaultEngine();
return $engine->formatLimitData( $key, $value, $report, $isHTML, $localize );
}
/**
* EditPage::showStandardInputs:options hook
*
* @param EditPage $editor
* @param OutputPage $output
* @param int &$tab Current tabindex
* @return bool
*/
public function onEditPage__showStandardInputs_options( $editor, $output, &$tab ) {
if ( $editor->getTitle()->hasContentModel( CONTENT_MODEL_SCRIBUNTO ) ) {
$output->addModules( 'ext.scribunto.edit' );
$editor->editFormTextAfterTools .= '<div id="mw-scribunto-console"></div>';
}
return true;
}
/**
* EditPage::showReadOnlyForm:initial hook
*
* @param EditPage $editor
* @param OutputPage $output
* @return bool
*/
public function onEditPage__showReadOnlyForm_initial( $editor, $output ) {
if ( $editor->getTitle()->hasContentModel( CONTENT_MODEL_SCRIBUNTO ) ) {
$output->addModules( 'ext.scribunto.edit' );
$editor->editFormTextAfterContent .= '<div id="mw-scribunto-console"></div>';
}
return true;
}
/**
* EditPageBeforeEditButtons hook
*
* @param EditPage $editor
* @param array &$buttons Button array
* @param int &$tabindex Current tabindex
* @return bool
*/
public function onEditPageBeforeEditButtons( $editor, &$buttons, &$tabindex ) {
if ( $editor->getTitle()->hasContentModel( CONTENT_MODEL_SCRIBUNTO ) ) {
unset( $buttons['preview'] );
}
return true;
}
/**
* @param IContextSource $context
* @param Content $content
* @param Status $status
* @param string $summary
* @param User $user
* @param bool $minoredit
* @return bool
*/
public function onEditFilterMergedContent( IContextSource $context, Content $content,
Status $status, $summary, User $user, $minoredit
) {
$title = $context->getTitle();
if ( !$content instanceof ScribuntoContent ) {
return true;
}
$contentHandlerFactory = MediaWikiServices::getInstance()->getContentHandlerFactory();
$contentHandler = $contentHandlerFactory->getContentHandler( $content->getModel() );
'@phan-var ScribuntoContentHandler $contentHandler';
$validateStatus = $contentHandler->validate( $content, $title );
if ( $validateStatus->isOK() ) {
return true;
}
$status->merge( $validateStatus );
if ( isset( $validateStatus->value->params['module'] ) ) {
$module = $validateStatus->value->params['module'];
$line = $validateStatus->value->params['line'];
if ( $module === $title->getPrefixedDBkey() && preg_match( '/^\d+$/', $line ) ) {
$out = $context->getOutput();
$out->addInlineScript( 'window.location.hash = ' . Xml::encodeJsVar( "#mw-ce-l$line" ) );
}
}
if ( !$status->isOK() ) {
// @todo Remove this line after this extension do not support mediawiki version 1.36 and before
$status->value = EditPage::AS_HOOK_ERROR_EXPECTED;
return false;
}
return true;
}
Added tests and fixed bugs * Added unit tests for the two Lua interpreter classes * Fixed a bug in checkType() * Have Scribunto_LuaSandboxInterpreter throw an exception on construct when the extension doesn't exist, to match the standalone behaviour. * In Scribunto_LuaSandboxInterpreter, removed debugging statements accidentally left in. * Convert LuaSandboxTimeoutError to the appropriate common error message. * Moved the option munging from the sandbox engine to the interpreter, so that the interpreter can be unit tested separately. * Use /bin/sh instead of bash for lua_ulimit.sh, since dash is smaller and still supports ulimit. * Use exec to run the lua binary, so that the vsize of the shell doesn't add to the memory limit. * Added a quit function to the standalone interpreter. Unused at present. * Don't add a comma after the last element of a table in a Lua expression. * Make the SIGXCPU detection work: proc_open() runs the command via a shell, which reports signals in the child via the exit status, so proc_get_status() will never return a valid termsig element. * In MWServer:call(), fixed a bug causing the return values to be wrapped in an array. * Fixed a misunderstanding of what select() does. * In MWServer:getStatus(), fixed indexes so that vsize will be correct. Removed RSS, since it wasn't used anyway and turns out to be measured in multiples of the page size, and I couldn't be bothered trying to fetch that from getconf. Return the PID and vsize as numbers rather than strings. * Added a simple table dump feature to MWServer:debug(). * Fixed brackets in MWServer:tostring(). * Added missing Linux 32-bit binary. Change-Id: Ibf5f4656b1c0a9f81287d363184c3fe9d2abdafd
2012-04-16 04:41:08 +00:00
/**
* @param Article $article
* @param bool|ParserOutput|null &$outputDone
* @param bool &$pcache
* @return bool
*/
public function onArticleViewHeader( $article, &$outputDone, &$pcache ) {
$title = $article->getTitle();
if ( Scribunto::isDocPage( $title, $forModule ) ) {
$article->getContext()->getOutput()->addHTML(
wfMessage( 'scribunto-doc-page-header', $forModule->getPrefixedText() )->parseAsBlock()
);
}
return true;
}
}