mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/SyntaxHighlight_GeSHi
synced 2024-11-23 22:13:40 +00:00
Add Parsoid support for syntaxhighlight
* Added Parsoid config, and refactored code slightly to add native Parsoid handlers for parser tags exposed by this extension. * Enabled parsoid mode testing on the test file. * Added html/parsoid sections on a few tests. * Marked rest of tests as wt2html and wt2wt only since html2wt and html2html will fail without a html/parsoid section and there is no real benefit to adding them to all tests. * Added a couple tests to the known failures list: - One is because of T299103. - The other is because Parsoid always emits attributes in the form <tag .. foo="bar"..> instead of just <tag ... foo ..> Since Parsoid needs to accept this format that is present on wikis, I added a html/parsoid section for this test and added the failures to the known failures list. Bug: T272939 Change-Id: Ie30aa6b082d4fc43c73296ff2ed6cb8c3873f48f
This commit is contained in:
parent
3bee59df01
commit
0eef7add67
|
@ -102,6 +102,9 @@
|
|||
],
|
||||
"SoftwareInfo": "SyntaxHighlight::onSoftwareInfo"
|
||||
},
|
||||
"ParsoidModules": [
|
||||
"MediaWiki\\SyntaxHighlight\\ParsoidExt"
|
||||
],
|
||||
"attributes": {
|
||||
"SyntaxHighlight": {
|
||||
"Models": {}
|
||||
|
|
39
includes/ParsoidExt.php
Normal file
39
includes/ParsoidExt.php
Normal file
|
@ -0,0 +1,39 @@
|
|||
<?php
|
||||
|
||||
declare( strict_types = 1 );
|
||||
|
||||
namespace MediaWiki\SyntaxHighlight;
|
||||
|
||||
use Wikimedia\Parsoid\Ext\ExtensionModule;
|
||||
|
||||
class ParsoidExt implements ExtensionModule {
|
||||
|
||||
/** @inheritDoc */
|
||||
public function getConfig(): array {
|
||||
return [
|
||||
'name' => 'SyntaxHighlight',
|
||||
'tags' => [
|
||||
[
|
||||
'name' => 'source',
|
||||
'handler' => SyntaxHighlight::class,
|
||||
'options' => [
|
||||
// Strip nowiki markers from #tag parser-function arguments.
|
||||
// This will be used to resolve T299103.
|
||||
// This is primarily a b/c flag in Parsoid.
|
||||
'stripNowiki' => true
|
||||
]
|
||||
],
|
||||
[
|
||||
'name' => 'syntaxhighlight',
|
||||
'handler' => SyntaxHighlight::class,
|
||||
'options' => [
|
||||
// Strip nowiki markers from #tag parser-function arguments.
|
||||
// This will be used to resolve T299103.
|
||||
// This is primarily a b/c flag in Parsoid.
|
||||
'stripNowiki' => true
|
||||
]
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
}
|
|
@ -34,7 +34,11 @@ use TextContent;
|
|||
use Title;
|
||||
use WANObjectCache;
|
||||
|
||||
class SyntaxHighlight {
|
||||
use Wikimedia\Parsoid\DOM\DocumentFragment;
|
||||
use Wikimedia\Parsoid\Ext\ExtensionTagHandler;
|
||||
use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI;
|
||||
|
||||
class SyntaxHighlight extends ExtensionTagHandler {
|
||||
|
||||
/** @var int The maximum number of lines that may be selected for highlighting. */
|
||||
private const HIGHLIGHT_MAX_LINES = 1000;
|
||||
|
@ -116,6 +120,45 @@ class SyntaxHighlight {
|
|||
return self::parserHook( $text, $args, $parser );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
private static function getModuleStyles(): array {
|
||||
return [ 'ext.pygments' ];
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param string $text
|
||||
* @param array $args
|
||||
* @param ?Parser $parser
|
||||
* @return array
|
||||
* @throws MWException
|
||||
*/
|
||||
private static function processContent( string $text, array $args, ?Parser $parser = null ): array {
|
||||
// Don't trim leading spaces away, just the linefeeds
|
||||
$out = preg_replace( '/^\n+/', '', rtrim( $text ) );
|
||||
$trackingCats = [];
|
||||
|
||||
// Convert deprecated attributes
|
||||
if ( isset( $args['enclose'] ) ) {
|
||||
if ( $args['enclose'] === 'none' ) {
|
||||
$args['inline'] = true;
|
||||
}
|
||||
unset( $args['enclose'] );
|
||||
$trackingCats[] = 'syntaxhighlight-enclose-category';
|
||||
}
|
||||
|
||||
$lexer = $args['lang'] ?? '';
|
||||
|
||||
$result = self::highlight( $out, $lexer, $args, $parser );
|
||||
if ( !$result->isGood() ) {
|
||||
$trackingCats[] = 'syntaxhighlight-error-category';
|
||||
}
|
||||
|
||||
return [ 'html' => $result->getValue(), 'cats' => $trackingCats ];
|
||||
}
|
||||
|
||||
/**
|
||||
* Parser hook for both <source> and <syntaxhighlight> logic
|
||||
*
|
||||
|
@ -129,32 +172,30 @@ class SyntaxHighlight {
|
|||
// Replace strip markers (For e.g. {{#tag:syntaxhighlight|<nowiki>...}})
|
||||
$out = $parser->getStripState()->unstripNoWiki( $text );
|
||||
|
||||
// Don't trim leading spaces away, just the linefeeds
|
||||
$out = preg_replace( '/^\n+/', '', rtrim( $out ) );
|
||||
|
||||
// Convert deprecated attributes
|
||||
if ( isset( $args['enclose'] ) ) {
|
||||
if ( $args['enclose'] === 'none' ) {
|
||||
$args['inline'] = true;
|
||||
}
|
||||
unset( $args['enclose'] );
|
||||
$parser->addTrackingCategory( 'syntaxhighlight-enclose-category' );
|
||||
$result = self::processContent( $out, $args, $parser );
|
||||
foreach ( $result['cats'] as $cat ) {
|
||||
$parser->addTrackingCategory( $cat );
|
||||
}
|
||||
|
||||
$lexer = $args['lang'] ?? '';
|
||||
|
||||
$result = self::highlight( $out, $lexer, $args, $parser );
|
||||
if ( !$result->isGood() ) {
|
||||
$parser->addTrackingCategory( 'syntaxhighlight-error-category' );
|
||||
}
|
||||
$out = $result->getValue();
|
||||
|
||||
// Register CSS
|
||||
// TODO: Consider moving to a separate method so that public method
|
||||
// highlight() can be used without needing to know the module name.
|
||||
$parser->getOutput()->addModuleStyles( [ 'ext.pygments' ] );
|
||||
$parser->getOutput()->addModuleStyles( self::getModuleStyles() );
|
||||
|
||||
return $out;
|
||||
return $result['html'];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function sourceToDom(
|
||||
ParsoidExtensionAPI $extApi, string $text, array $extArgs
|
||||
): ?DocumentFragment {
|
||||
$result = self::processContent( $text, $extApi->extArgsToArray( $extArgs ) );
|
||||
|
||||
// FIXME: There is no API method in Parsoid to add tracking categories
|
||||
// So, $result['cats'] is being ignored
|
||||
|
||||
// Register CSS
|
||||
$extApi->addModuleStyles( self::getModuleStyles() );
|
||||
|
||||
return $extApi->htmlToDom( $result['html'] );
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -529,7 +570,7 @@ class SyntaxHighlight {
|
|||
}
|
||||
$out = $status->getValue();
|
||||
|
||||
$parserOutput->addModuleStyles( [ 'ext.pygments' ] );
|
||||
$parserOutput->addModuleStyles( self::getModuleStyles() );
|
||||
$parserOutput->addModules( [ 'ext.pygments.linenumbers' ] );
|
||||
$parserOutput->setText( $out );
|
||||
|
||||
|
@ -566,7 +607,7 @@ class SyntaxHighlight {
|
|||
$out = '<pre' . $encodedAttrs . '>' . substr( $out, strlen( $m[0] ) );
|
||||
}
|
||||
$output = $context->getOutput();
|
||||
$output->addModuleStyles( 'ext.pygments' );
|
||||
$output->addModuleStyles( self::getModuleStyles() );
|
||||
$output->addHTML( '<div dir="ltr">' . $out . '</div>' );
|
||||
|
||||
// Inform MediaWiki that we have parsed this page and it shouldn't mess with it.
|
||||
|
|
26
tests/parser/parserTests-knownFailures.json
Normal file
26
tests/parser/parserTests-knownFailures.json
Normal file
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"Enclose with nowiki": {
|
||||
"wt2html": "<p data-parsoid='{\"dsr\":[0,69,0,0]}'><code class=\"mw-highlight mw-highlight-lang-text mw-content-ltr\" dir=\"ltr\" typeof=\"mw:Extension/syntaxhighlight mw:Transclusion\" about=\"#mwt2\" data-parsoid='{\"pi\":[[{\"k\":\"1\"},{\"k\":\"lang\",\"named\":true},{\"k\":\"inline\",\"named\":true}]],\"dsr\":[0,69,null,null]}' data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"#tag:syntaxhighlight\",\"function\":\"tag\"},\"params\":{\"1\":{\"wt\":\"<nowiki>foo</nowiki>\"},\"lang\":{\"wt\":\"\\\"text\\\"\"},\"inline\":{\"wt\":\"none\"}},\"i\":0}}]}'><nowiki>foo</nowiki></code></p>",
|
||||
"html2html": "<p data-parsoid='{\"dsr\":[0,69,0,0]}'><code class=\"mw-highlight mw-highlight-lang-text mw-content-ltr\" dir=\"ltr\" typeof=\"mw:Extension/syntaxhighlight mw:Transclusion\" about=\"#mwt2\" data-parsoid='{\"pi\":[[{\"k\":\"1\"},{\"k\":\"lang\",\"named\":true},{\"k\":\"inline\",\"named\":true}]],\"dsr\":[0,69,null,null]}' data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"#tag:syntaxhighlight\",\"function\":\"tag\"},\"params\":{\"1\":{\"wt\":\"<nowiki>foo</nowiki>\"},\"lang\":{\"wt\":\"\\\"text\\\"\"},\"inline\":{\"wt\":\"none\"}},\"i\":0}}]}'><nowiki>foo</nowiki></code></p>"
|
||||
},
|
||||
"Inline attribute (inline code)": {
|
||||
"wt2wt": "Text <source lang=\"javascript\" inline=\"\">var a;</source>.",
|
||||
"selser [1]": "Text <source lang=\"javascript\" inline>var a;</source>.",
|
||||
"selser [[4,0,4]]": "121y1t4<source lang=\"javascript\" inline>var a;</source>1ibg90y",
|
||||
"selser [[0,0,2]]": "Text <source lang=\"javascript\" inline>var a;</source>yeuqlm.",
|
||||
"selser [2]": "brhsc5\n\nText <source lang=\"javascript\" inline>var a;</source>.",
|
||||
"selser [[0,0,4]]": "Text <source lang=\"javascript\" inline>var a;</source>4x62b6",
|
||||
"selser [[2,0,0]]": "1mjitw8Text <source lang=\"javascript\" inline>var a;</source>.",
|
||||
"selser [[0,0,3]]": "Text <source lang=\"javascript\" inline>var a;</source>",
|
||||
"selser [[2,0,4]]": "uuwnfvText <source lang=\"javascript\" inline>var a;</source>qjkvx1",
|
||||
"selser [[3,0,3]]": "<source lang=\"javascript\" inline>var a;</source>",
|
||||
"selser [[4,0,0]]": "uouej0<source lang=\"javascript\" inline>var a;</source>.",
|
||||
"selser [[3,0,0]]": "<source lang=\"javascript\" inline>var a;</source>.",
|
||||
"selser [[4,0,3]]": "1wcoh54<source lang=\"javascript\" inline>var a;</source>",
|
||||
"selser [[2,0,2]]": "11th7ozText <source lang=\"javascript\" inline>var a;</source>1c8ff9m.",
|
||||
"selser [[2,0,3]]": "1cyv6rText <source lang=\"javascript\" inline>var a;</source>",
|
||||
"selser [[4,0,2]]": "g6ytvp<source lang=\"javascript\" inline>var a;</source>10gwvyi.",
|
||||
"selser [[3,0,2]]": "<source lang=\"javascript\" inline>var a;</source>kjtjv6.",
|
||||
"selser [[3,0,4]]": "<source lang=\"javascript\" inline>var a;</source>zs6oqy"
|
||||
}
|
||||
}
|
|
@ -1,4 +1,8 @@
|
|||
!! Version 2
|
||||
!! options
|
||||
parsoid-compatible
|
||||
version=2
|
||||
!! end
|
||||
|
||||
# Force the test runner to ensure the extension is loaded
|
||||
!! hooks
|
||||
source
|
||||
|
@ -10,12 +14,16 @@ Non-existent language
|
|||
<source lang="doesnotexist">
|
||||
foobar
|
||||
</source>
|
||||
!! html
|
||||
!! html/php
|
||||
<div class="mw-highlight mw-content-ltr" dir="ltr"><pre>foobar</pre></div>
|
||||
!! html/parsoid
|
||||
<div class="mw-highlight mw-content-ltr" dir="ltr" about="#mwt1" typeof="mw:Extension/source" data-mw='{"name":"source","attrs":{"lang":"doesnotexist"},"body":{"extsrc":"\nfoobar\n"}}'><pre>foobar</pre></div>
|
||||
!! end
|
||||
|
||||
!! test
|
||||
No language specified
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
!! wikitext
|
||||
<source>
|
||||
foo
|
||||
|
@ -26,6 +34,8 @@ foo
|
|||
|
||||
!! test
|
||||
No language specified (no wellformed xml)
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
!! config
|
||||
!! wikitext
|
||||
<source>
|
||||
|
@ -37,6 +47,8 @@ bar
|
|||
|
||||
!! test
|
||||
XSS is escaped
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
!! wikitext
|
||||
<source lang="doesnotexist">
|
||||
<script>alert("pwnd")</script>
|
||||
|
@ -61,8 +73,10 @@ SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105
|
|||
|
||||
!! test
|
||||
XSS is escaped (inline)
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
!! wikitext
|
||||
<source lang="doesnotexist" inline>
|
||||
<source lang="doesnotexist" inline="">
|
||||
<script>alert("pwnd")</script>
|
||||
<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>
|
||||
<IMG
|
||||
|
@ -79,6 +93,8 @@ SRC=javascript:al
|
|||
|
||||
!! test
|
||||
Default behaviour (inner is pre)
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
!! wikitext
|
||||
<source lang="javascript">
|
||||
var a;
|
||||
|
@ -90,11 +106,13 @@ var a;
|
|||
|
||||
!! test
|
||||
Multiline <source/> in lists
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
!! wikitext
|
||||
* <source>a
|
||||
*<source>a
|
||||
b</source>
|
||||
|
||||
* foo <source>a
|
||||
*foo <source>a
|
||||
b</source>
|
||||
!! html
|
||||
<ul><li><div class="mw-highlight mw-content-ltr" dir="ltr"><pre>a
|
||||
|
@ -105,6 +123,8 @@ b</pre></div></li></ul>
|
|||
|
||||
!! test
|
||||
Custom attributes
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
!! wikitext
|
||||
<source lang="javascript" id="foo" class="bar" dir="rtl" style="font-size: larger;">var a;</source>
|
||||
!! html
|
||||
|
@ -112,17 +132,25 @@ Custom attributes
|
|||
</pre></div>
|
||||
!! end
|
||||
|
||||
# The html/parsoid section verifies that Parsoid can handle attributes
|
||||
# that are not in key=value XML-like syntax.
|
||||
!! test
|
||||
Inline attribute (inline code)
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
!! wikitext
|
||||
Text <source lang="javascript" inline>var a;</source>.
|
||||
!! html
|
||||
!! html/php
|
||||
<p>Text <code class="mw-highlight mw-highlight-lang-javascript mw-content-ltr" dir="ltr"><span class="kd">var</span> <span class="nx">a</span><span class="p">;</span></code>.
|
||||
</p>
|
||||
!! html/parsoid
|
||||
<p>Text <code class="mw-highlight mw-highlight-lang-javascript mw-content-ltr" dir="ltr" about="#mwt1" typeof="mw:Extension/source" data-mw='{"name":"source","attrs":{"lang":"javascript","inline":""},"body":{"extsrc":"var a;"}}'><span class="kd">var</span> <span class="nx">a</span><span class="p">;</span></code>.</p>
|
||||
!! end
|
||||
|
||||
!! test
|
||||
Enclose none (inline code)
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
!! wikitext
|
||||
Text <source lang="javascript" enclose="none">var a;</source>.
|
||||
!! html
|
||||
|
@ -134,13 +162,17 @@ Text <source lang="javascript" enclose="none">var a;</source>.
|
|||
Enclose with nowiki
|
||||
!! wikitext
|
||||
{{#tag:syntaxhighlight|<nowiki>foo</nowiki>|lang="text"|inline=none}}
|
||||
!! html
|
||||
!! html/php
|
||||
<p><code class="mw-highlight mw-highlight-lang-text mw-content-ltr" dir="ltr">foo</code>
|
||||
</p>
|
||||
!! html/parsoid
|
||||
<p><code class="mw-highlight mw-highlight-lang-text mw-content-ltr" dir="ltr" about="#mwt1" typeof="mw:Extension/syntaxhighlight mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"#tag:syntaxhighlight","function":"tag"},"params":{"1":{"wt":"<nowiki>foo</nowiki>"},"lang":{"wt":"\"text\""},"inline":{"wt":"none"}},"i":0}}]}'>foo</code></p>
|
||||
!! end
|
||||
|
||||
!! test
|
||||
No code
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
!! wikitext
|
||||
<source lang="CSharp"></source>
|
||||
!! html
|
||||
|
@ -149,6 +181,8 @@ No code
|
|||
|
||||
!! test
|
||||
Just whitespace
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
!! wikitext
|
||||
<source lang="CSharp"> </source>
|
||||
!! html
|
||||
|
@ -157,6 +191,8 @@ Just whitespace
|
|||
|
||||
!! test
|
||||
tabs plus tidy (T32930, T59826)
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
!! wikitext
|
||||
<syntaxhighlight lang="javascript" enclose="pre" highlight="2-3">
|
||||
function doSomething() {
|
||||
|
@ -175,19 +211,22 @@ function doSomething() {
|
|||
!! test
|
||||
deprecated source tag adds tracking category
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
!! options
|
||||
cat
|
||||
!! wikitext
|
||||
<source lang="python">print('Hi')</source>
|
||||
!! html
|
||||
!! html/php
|
||||
cat=Pages_using_deprecated_source_tags sort=
|
||||
!! end
|
||||
|
||||
!! test
|
||||
deprecated enclose option adds tracking category
|
||||
!! options
|
||||
parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true }
|
||||
cat
|
||||
!! wikitext
|
||||
<syntaxhighlight enclose="none" lang="python">print('Hi')</syntaxhighlight>
|
||||
!! html
|
||||
!! html/php
|
||||
cat=Pages_using_deprecated_enclose_attributes sort=
|
||||
!! end
|
||||
|
|
Loading…
Reference in a new issue