mediawiki-extensions-Scribunto/includes/ScribuntoContentHandler.php
Timo Tijhof 61874ca30e ScribuntoContentHandler: Refactor fillParserOutput()
The ParserOutput object used here starts life as the ParserOutput for
parsing the docpage (wrapped via an interface message). In order to
remove use of the Title::getPageViewLanguage method there, we need to
re-arrange some logic such that we parse the doc page first, and see
what language it was rendered in, instead of currently where
Title::getPageViewLanguage tries to "guess" what Parser and
LanguageConverter will do.

As prep for that stop reading/writing the HTML text of this
ParserOutput object in favour of being more like the Parser itself,
which accumulates metadata in ParserOutput and calls setText only
once at the end.

* Refactor highlight() to return standalone HTML instead.
* Refactor validation error to append to $html instead.

Other improvements while at it:

* Document how stuff works today.
* Clarify variable names.
* Separate concerns better by moving responsibility of `<pre>`
  fallback to highlight(), and limiting knowledge of
  ScribuntoEngineBase to the caller.

Bug: T350806
Change-Id: I9fe6d93727f29c284ea21db6edd6a2b1663e8e06
2023-12-01 22:25:51 +00:00

244 lines
7.2 KiB
PHP

<?php
namespace MediaWiki\Extension\Scribunto;
use CodeContentHandler;
use Content;
use ExtensionRegistry;
use Html;
use MediaWiki\Content\Renderer\ContentParseParams;
use MediaWiki\Content\ValidationParams;
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\PageIdentity;
use MediaWiki\SyntaxHighlight\SyntaxHighlight;
use MediaWiki\Title\Title;
use ParserOutput;
use Status;
use TextContent;
/**
* Scribunto Content Handler
*
* @file
* @ingroup Extensions
* @ingroup Scribunto
*
* @author Brad Jorsch <bjorsch@wikimedia.org>
*/
class ScribuntoContentHandler extends CodeContentHandler {
/**
* @param string $modelId
* @param string[] $formats
*/
public function __construct(
$modelId = CONTENT_MODEL_SCRIBUNTO, $formats = [ CONTENT_FORMAT_TEXT ]
) {
parent::__construct( $modelId, $formats );
}
/**
* @return string Class name
*/
protected function getContentClass() {
return ScribuntoContent::class;
}
/**
* @param string $format
* @return bool
*/
public function isSupportedFormat( $format ) {
// An error in an earlier version of Scribunto means we might see this.
if ( $format === 'CONTENT_FORMAT_TEXT' ) {
$format = CONTENT_FORMAT_TEXT;
}
return parent::isSupportedFormat( $format );
}
/**
* Only allow this content handler to be used in the Module namespace
* @param Title $title
* @return bool
*/
public function canBeUsedOn( Title $title ) {
if ( $title->getNamespace() !== NS_MODULE ) {
return false;
}
return parent::canBeUsedOn( $title );
}
/** @inheritDoc */
public function supportsPreloadContent(): bool {
return true;
}
/**
* @inheritDoc
*/
public function validateSave(
Content $content,
ValidationParams $validationParams
) {
'@phan-var ScribuntoContent $content';
return $this->validate( $content, $validationParams->getPageIdentity() );
}
/**
* Checks whether the script is valid
*
* @param TextContent $content
* @param PageIdentity $page
* @return Status
*/
public function validate( TextContent $content, PageIdentity $page ) {
if ( !( $page instanceof Title ) ) {
$titleFactory = MediaWikiServices::getInstance()->getTitleFactory();
$page = $titleFactory->castFromPageIdentity( $page );
}
$engine = Scribunto::newDefaultEngine();
// @phan-suppress-next-line PhanTypeMismatchArgument
$engine->setTitle( $page );
return $engine->validate( $content->getText(), $page->getPrefixedDBkey() );
}
/**
* @inheritDoc
*/
protected function fillParserOutput(
Content $content,
ContentParseParams $cpoParams,
ParserOutput &$parserOutput
) {
'@phan-var ScribuntoContent $content';
$page = $cpoParams->getPage();
$title = Title::castFromPageReference( $page );
$parserOptions = $cpoParams->getParserOptions();
$revId = $cpoParams->getRevId();
$generateHtml = $cpoParams->getGenerateHtml();
$parser = MediaWikiServices::getInstance()->getParserFactory()->getInstance();
$sourceCode = $content->getText();
// @phan-suppress-next-line PhanTypeMismatchArgument
$docTitle = Scribunto::getDocPage( $title );
$docMsg = $docTitle ? wfMessage(
$docTitle->exists() ? 'scribunto-doc-page-show' : 'scribunto-doc-page-does-not-exist',
$docTitle->getPrefixedText()
)->inContentLanguage() : null;
// Accumulate the following output:
// - docs (if any)
// - validation error (if any)
// - highlighted source code
$parserOutput = new ParserOutput();
$html = '';
if ( $docMsg ) {
if ( !$docMsg->isDisabled() ) {
// In order to allow the doc page to categorize the Module page,
// we need access to the ParserOutput of the doc page.
// This is why we can't simply use $docMsg->parse().
$docViewLang = $docTitle->getPageViewLanguage();
$dir = $docViewLang->getDir();
// Code is forced to be ltr, but the documentation can be rtl.
// Correct direction class is needed for correct formatting.
// The possible classes are
// mw-content-ltr or mw-content-rtl
$dirClass = "mw-content-$dir";
$docWikitext = Html::rawElement(
'div',
[
'lang' => $docViewLang->getHtmlCode(),
'dir' => $dir,
'class' => $dirClass,
],
// Line breaks are needed so that wikitext would be
// appropriately isolated for correct parsing. See Bug 60664.
"\n" . $docMsg->plain() . "\n"
);
if ( $parserOptions->getTargetLanguage() === null ) {
$parserOptions->setTargetLanguage( $docTitle->getPageLanguage() );
}
$parserOutput = $parser->parse( $docWikitext, $page, $parserOptions, true, true, $revId );
$html .= $parserOutput->getRawText();
}
}
if ( $docTitle ) {
// Mark the doc page as transcluded, so that edits to the doc page will
// purge this Module page.
$parserOutput->addTemplate( $docTitle, $docTitle->getArticleID(), $docTitle->getLatestRevID() );
}
// Validate the script, and include an error message and tracking
// category if it's invalid
// @phan-suppress-next-line PhanTypeMismatchArgument
$status = $this->validate( $content, $title );
if ( !$status->isOK() ) {
// FIXME: This uses a Status object, which in turn uses global RequestContext
// to localize the message. This would poison the ParserCache.
//
// But, this code is almost unreachable in practice because there has
// been no way to create a Module page with invalid content since 2014
// (we validate and abort on edit, undelete, content-model change etc.).
// See also T304381.
$html .= Html::rawElement( 'div', [ 'class' => 'errorbox' ],
$status->getHTML( 'scribunto-error-short', 'scribunto-error-long' )
);
$trackingCategories = MediaWikiServices::getInstance()->getTrackingCategories();
$trackingCategories->addTrackingCategory( $parserOutput, 'scribunto-module-with-errors-category', $page );
}
if ( !$generateHtml ) {
// The doc page and validation error produce metadata and must happen
// unconditionally. The next step (syntax highlight) can be skipped if
// we don't actually need the HTML.
$parserOutput->setText( '' );
return;
}
$engine = Scribunto::newDefaultEngine();
// @phan-suppress-next-line PhanTypeMismatchArgument
$engine->setTitle( $title );
$codeLang = $engine->getGeSHiLanguage();
$html .= $this->highlight( $sourceCode, $parserOutput, $codeLang );
$parserOutput->setText( $html );
}
/**
* Get syntax highlighted code and add metadata to output.
*
* If SyntaxHighlight is not possible, falls back to a `<pre>` element.
*
* @param string $source Source code
* @param ParserOutput $parserOutput
* @param string|false $codeLang
* @return string HTML
*/
private function highlight( $source, ParserOutput $parserOutput, $codeLang ) {
global $wgScribuntoUseGeSHi;
if (
$wgScribuntoUseGeSHi && $codeLang && ExtensionRegistry::getInstance()->isLoaded( 'SyntaxHighlight' )
) {
$status = SyntaxHighlight::highlight( $source, $codeLang, [ 'line' => true, 'linelinks' => 'L' ] );
if ( $status->isGood() ) {
// @todo replace addModuleStyles line with the appropriate call on
// SyntaxHighlight once one is created
$parserOutput->addModuleStyles( [ 'ext.pygments' ] );
$parserOutput->addModules( [ 'ext.pygments.linenumbers' ] );
return $status->getValue();
}
}
return "<pre class='mw-code mw-script' dir='ltr'>\n" .
htmlspecialchars( $source ) .
"\n</pre>\n";
}
}