mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/SyntaxHighlight_GeSHi
synced 2024-11-24 06:24:29 +00:00
2d3af74c39
We already add the dir=ltr/rtl HTML attribute so this should be a no-op and makes it consistent with block snippets. Change-Id: I53e9204cc3bd54ba167f6f91e718a9d35b5bdfd0
594 lines
18 KiB
PHP
594 lines
18 KiB
PHP
<?php
|
|
/**
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*/
|
|
|
|
use MediaWiki\MediaWikiServices;
|
|
use MediaWiki\Shell\Shell;
|
|
|
|
class SyntaxHighlight {
|
|
|
|
/** @var int The maximum number of lines that may be selected for highlighting. */
|
|
private const HIGHLIGHT_MAX_LINES = 1000;
|
|
|
|
/** @var int Maximum input size for the highlighter (100 kB). */
|
|
private const HIGHLIGHT_MAX_BYTES = 102400;
|
|
|
|
/** @var string CSS class for syntax-highlighted code. Public as used by the updateCSS maintenance script. */
|
|
public const HIGHLIGHT_CSS_CLASS = 'mw-highlight';
|
|
|
|
/** @var int Cache version. Increment whenever the HTML changes. */
|
|
private const CACHE_VERSION = 2;
|
|
|
|
/** @var array Mapping of MIME-types to lexer names. */
|
|
private static $mimeLexers = [
|
|
'text/javascript' => 'javascript',
|
|
'application/json' => 'javascript',
|
|
'text/xml' => 'xml',
|
|
];
|
|
|
|
/**
|
|
* Get the Pygments lexer name for a particular language.
|
|
*
|
|
* @param string $lang Language name.
|
|
* @return string|null Lexer name, or null if no matching lexer.
|
|
*/
|
|
private static function getLexer( $lang ) {
|
|
static $lexers = null;
|
|
|
|
if ( $lang === null ) {
|
|
return null;
|
|
}
|
|
|
|
if ( !$lexers ) {
|
|
$lexers = require __DIR__ . '/../SyntaxHighlight.lexers.php';
|
|
}
|
|
|
|
$lexer = strtolower( $lang );
|
|
|
|
if ( isset( $lexers[$lexer] ) ) {
|
|
return $lexer;
|
|
}
|
|
|
|
$geshi2pygments = SyntaxHighlightGeSHiCompat::getGeSHiToPygmentsMap();
|
|
|
|
// Check if this is a GeSHi lexer name for which there exists
|
|
// a compatible Pygments lexer with a different name.
|
|
if ( isset( $geshi2pygments[$lexer] ) ) {
|
|
$lexer = $geshi2pygments[$lexer];
|
|
if ( in_array( $lexer, $lexers ) ) {
|
|
return $lexer;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Register parser hook
|
|
*
|
|
* @param Parser $parser
|
|
*/
|
|
public static function onParserFirstCallInit( Parser $parser ) {
|
|
$parser->setHook( 'source', [ 'SyntaxHighlight', 'parserHookSource' ] );
|
|
$parser->setHook( 'syntaxhighlight', [ 'SyntaxHighlight', 'parserHook' ] );
|
|
}
|
|
|
|
/**
|
|
* Parser hook for <source> to add deprecated tracking category
|
|
*
|
|
* @param string $text
|
|
* @param array $args
|
|
* @param Parser $parser
|
|
* @return string
|
|
* @throws MWException
|
|
*/
|
|
public static function parserHookSource( $text, $args, $parser ) {
|
|
$parser->addTrackingCategory( 'syntaxhighlight-source-category' );
|
|
return self::parserHook( $text, $args, $parser );
|
|
}
|
|
|
|
/**
|
|
* Parser hook for both <source> and <syntaxhighlight> logic
|
|
*
|
|
* @param string $text
|
|
* @param array $args
|
|
* @param Parser $parser
|
|
* @return string
|
|
* @throws MWException
|
|
*/
|
|
public static function parserHook( $text, $args, $parser ) {
|
|
// Replace strip markers (For e.g. {{#tag:syntaxhighlight|<nowiki>...}})
|
|
$out = $parser->mStripState->unstripNoWiki( $text );
|
|
|
|
// Don't trim leading spaces away, just the linefeeds
|
|
$out = preg_replace( '/^\n+/', '', rtrim( $out ) );
|
|
|
|
// Convert deprecated attributes
|
|
if ( isset( $args['enclose'] ) ) {
|
|
if ( $args['enclose'] === 'none' ) {
|
|
$args['inline'] = true;
|
|
}
|
|
unset( $args['enclose'] );
|
|
$parser->addTrackingCategory( 'syntaxhighlight-enclose-category' );
|
|
}
|
|
|
|
$lexer = $args['lang'] ?? '';
|
|
|
|
$result = self::highlight( $out, $lexer, $args, $parser );
|
|
if ( !$result->isGood() ) {
|
|
$parser->addTrackingCategory( 'syntaxhighlight-error-category' );
|
|
}
|
|
$out = $result->getValue();
|
|
|
|
// Register CSS
|
|
// TODO: Consider moving to a separate method so that public method
|
|
// highlight() can be used without needing to know the module name.
|
|
$parser->getOutput()->addModuleStyles( 'ext.pygments' );
|
|
|
|
return $out;
|
|
}
|
|
|
|
/**
|
|
* Unwrap the <div> wrapper of the Pygments output
|
|
*
|
|
* @param string $out Output
|
|
* @return string Unwrapped output
|
|
*/
|
|
private static function unwrap( string $out ) : string {
|
|
if ( $out !== '' ) {
|
|
$m = [];
|
|
if ( preg_match( '/^<div class="?mw-highlight"?>(.*)<\/div>$/s', trim( $out ), $m ) ) {
|
|
$out = trim( $m[1] );
|
|
} else {
|
|
throw new MWException( 'Unexpected output from Pygments encountered' );
|
|
}
|
|
}
|
|
return $out;
|
|
}
|
|
|
|
/**
|
|
* @return string
|
|
*/
|
|
public static function getPygmentizePath() {
|
|
global $wgPygmentizePath;
|
|
|
|
// If $wgPygmentizePath is unset, use the bundled copy.
|
|
if ( $wgPygmentizePath === false ) {
|
|
$wgPygmentizePath = __DIR__ . '/../pygments/pygmentize';
|
|
}
|
|
|
|
return $wgPygmentizePath;
|
|
}
|
|
|
|
/**
|
|
* @param string $code
|
|
* @param bool $isInline
|
|
* @return string HTML
|
|
*/
|
|
private static function plainCodeWrap( $code, $isInline ) {
|
|
if ( $isInline ) {
|
|
return htmlspecialchars( $code, ENT_NOQUOTES );
|
|
}
|
|
|
|
return Html::rawElement(
|
|
'div',
|
|
[ 'class' => self::HIGHLIGHT_CSS_CLASS ],
|
|
Html::element( 'pre', [], $code )
|
|
);
|
|
}
|
|
|
|
/**
|
|
* @param string $code
|
|
* @param string|null $lang
|
|
* @param array $args
|
|
* @return Status
|
|
*/
|
|
private static function highlightInner( $code, $lang = null, $args = [] ) {
|
|
$status = new Status;
|
|
|
|
$lexer = self::getLexer( $lang );
|
|
if ( $lexer === null && $lang !== null ) {
|
|
$status->warning( 'syntaxhighlight-error-unknown-language', $lang );
|
|
}
|
|
|
|
// For empty tag, output nothing instead of empty <pre>.
|
|
if ( $code === '' ) {
|
|
$status->value = '';
|
|
return $status;
|
|
}
|
|
|
|
$length = strlen( $code );
|
|
if ( strlen( $code ) > self::HIGHLIGHT_MAX_BYTES ) {
|
|
// Disable syntax highlighting
|
|
$lexer = null;
|
|
$status->warning(
|
|
'syntaxhighlight-error-exceeds-size-limit',
|
|
$length,
|
|
self::HIGHLIGHT_MAX_BYTES
|
|
);
|
|
} elseif ( Shell::isDisabled() ) {
|
|
// Disable syntax highlighting
|
|
$lexer = null;
|
|
$status->warning( 'syntaxhighlight-error-pygments-invocation-failure' );
|
|
wfWarn(
|
|
'MediaWiki determined that it cannot invoke Pygments. ' .
|
|
'As a result, SyntaxHighlight_GeSHi will not perform any syntax highlighting. ' .
|
|
'See the debug log for details: ' .
|
|
'https://www.mediawiki.org/wiki/Manual:$wgDebugLogFile'
|
|
);
|
|
}
|
|
|
|
$isInline = isset( $args['inline'] );
|
|
$showLines = isset( $args['line'] );
|
|
|
|
if ( $isInline ) {
|
|
$code = trim( $code );
|
|
}
|
|
|
|
if ( $lexer === null ) {
|
|
// When syntax highlighting is disabled..
|
|
$status->value = self::plainCodeWrap( $code, $isInline );
|
|
return $status;
|
|
}
|
|
|
|
$options = [
|
|
'cssclass' => self::HIGHLIGHT_CSS_CLASS,
|
|
'encoding' => 'utf-8',
|
|
];
|
|
|
|
// Line numbers
|
|
if ( $showLines ) {
|
|
$options['linenos'] = 'inline';
|
|
}
|
|
|
|
if ( $lexer === 'php' && strpos( $code, '<?php' ) === false ) {
|
|
$options['startinline'] = 1;
|
|
}
|
|
|
|
// Highlight specific lines
|
|
if ( isset( $args['highlight'] ) ) {
|
|
$lines = self::parseHighlightLines( $args['highlight'] );
|
|
if ( count( $lines ) ) {
|
|
$options['hl_lines'] = implode( ' ', $lines );
|
|
}
|
|
}
|
|
|
|
// Starting line number
|
|
if ( isset( $args['start'] ) && ctype_digit( $args['start'] ) ) {
|
|
$options['linenostart'] = (int)$args['start'];
|
|
}
|
|
|
|
if ( !empty( $args['linelinks'] ) && ctype_alpha( $args['linelinks'] ) ) {
|
|
$options['linespans'] = $args['linelinks'];
|
|
}
|
|
|
|
if ( $isInline ) {
|
|
$options['nowrap'] = 1;
|
|
}
|
|
|
|
$cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
|
|
$error = null;
|
|
$output = $cache->getWithSetCallback(
|
|
$cache->makeGlobalKey( 'highlight', self::makeCacheKeyHash( $code, $lexer, $options ) ),
|
|
$cache::TTL_MONTH,
|
|
function ( $oldValue, &$ttl ) use ( $code, $lexer, $options, &$error ) {
|
|
$optionPairs = [];
|
|
foreach ( $options as $k => $v ) {
|
|
$optionPairs[] = "{$k}={$v}";
|
|
}
|
|
$result = Shell::command(
|
|
self::getPygmentizePath(),
|
|
'-l', $lexer,
|
|
'-f', 'html',
|
|
'-O', implode( ',', $optionPairs )
|
|
)
|
|
->input( $code )
|
|
->restrict( Shell::RESTRICT_DEFAULT | Shell::NO_NETWORK )
|
|
->execute();
|
|
|
|
if ( $result->getExitCode() != 0 ) {
|
|
$ttl = WANObjectCache::TTL_UNCACHEABLE;
|
|
$error = $result->getStderr();
|
|
return null;
|
|
}
|
|
|
|
return $result->getStdout();
|
|
}
|
|
);
|
|
|
|
if ( $error !== null || $output === null ) {
|
|
$status->warning( 'syntaxhighlight-error-pygments-invocation-failure' );
|
|
if ( $error !== null ) {
|
|
wfWarn( 'Failed to invoke Pygments: ' . $error );
|
|
} else {
|
|
wfWarn( 'Invoking Pygments returned blank output with no error response' );
|
|
}
|
|
|
|
// Fall back to preformatted code without syntax highlighting
|
|
$output = self::plainCodeWrap( $code, $isInline );
|
|
}
|
|
|
|
$status->value = $output;
|
|
|
|
return $status;
|
|
}
|
|
|
|
/**
|
|
* Highlight a code-block using a particular lexer.
|
|
*
|
|
* This produces raw HTML (wrapped by Status), the caller is responsible
|
|
* for making sure the "ext.pygments" module is loaded in the output.
|
|
*
|
|
* @param string $code Code to highlight.
|
|
* @param string|null $lang Language name, or null to use plain markup.
|
|
* @param array $args Associative array of additional arguments.
|
|
* If it contains a 'line' key, the output will include line numbers.
|
|
* If it includes a 'highlight' key, the value will be parsed as a
|
|
* comma-separated list of lines and line-ranges to highlight.
|
|
* If it contains a 'start' key, the value will be used as the line at which to
|
|
* start highlighting.
|
|
* If it contains a 'inline' key, the output will not be wrapped in `<div><pre/></div>`.
|
|
* If it contains a 'linelinks' key, lines will have links and anchors with a prefix
|
|
* of the value. Similar to the lineanchors+linespans features in Pygments.
|
|
* @param Parser|null $parser Parser, if generating content to be parsed.
|
|
* @return Status Status object, with HTML representing the highlighted
|
|
* code as its value.
|
|
*/
|
|
public static function highlight( $code, $lang = null, $args = [], ?Parser $parser = null ) {
|
|
$status = self::highlightInner( $code, $lang, $args );
|
|
$output = $status->getValue();
|
|
|
|
$isInline = isset( $args['inline'] );
|
|
$showLines = isset( $args['line'] );
|
|
$lexer = self::getLexer( $lang );
|
|
|
|
// Post-Pygment HTML transformations.
|
|
|
|
if ( $showLines ) {
|
|
$lineReplace = Html::element( 'span', [ 'class' => 'linenos', 'data-line' => '$1' ] );
|
|
if ( !empty( $args['linelinks'] ) ) {
|
|
$lineReplace = Html::rawElement(
|
|
'a',
|
|
[ 'href' => '#' . $args['linelinks'] . '-$1' ],
|
|
$lineReplace
|
|
);
|
|
}
|
|
// Convert line numbers to data attributes so they
|
|
// can be displayed as CSS generated content and be
|
|
// unselectable in all browsers.
|
|
$output = preg_replace(
|
|
'`<span class="linenos">\s*([^<]*)\s*</span>`',
|
|
$lineReplace,
|
|
$output
|
|
);
|
|
}
|
|
|
|
// Allow certain HTML attributes
|
|
$htmlAttribs = Sanitizer::validateAttributes(
|
|
$args, array_flip( [ 'style', 'class', 'id' ] )
|
|
);
|
|
|
|
$dir = ( isset( $args['dir'] ) && $args['dir'] === 'rtl' ) ? 'rtl' : 'ltr';
|
|
|
|
// Build class list
|
|
$classList = [];
|
|
if ( isset( $htmlAttribs['class'] ) ) {
|
|
$classList[] = $htmlAttribs['class'];
|
|
}
|
|
$classList[] = self::HIGHLIGHT_CSS_CLASS;
|
|
if ( $lexer !== null ) {
|
|
$classList[] = self::HIGHLIGHT_CSS_CLASS . '-lang-' . $lexer;
|
|
}
|
|
$classList[] = 'mw-content-' . $dir;
|
|
if ( $showLines ) {
|
|
$classList[] = self::HIGHLIGHT_CSS_CLASS . '-lines';
|
|
}
|
|
$htmlAttribs['class'] = implode( ' ', $classList );
|
|
$htmlAttribs['dir'] = $dir;
|
|
'@phan-var array{class:string,dir:string} $htmlAttribs';
|
|
|
|
if ( $isInline ) {
|
|
// We've already trimmed the input $code before highlighting,
|
|
// but pygment's standard out adds a line break afterwards,
|
|
// which would then be preserved in the paragraph that wraps this,
|
|
// and become visible as a space. Avoid that.
|
|
$output = trim( $output );
|
|
|
|
// Enforce inlineness. Stray newlines may result in unexpected list and paragraph processing
|
|
// (also known as doBlockLevels()).
|
|
$output = str_replace( "\n", ' ', $output );
|
|
$output = Html::rawElement( 'code', $htmlAttribs, $output );
|
|
} else {
|
|
$output = self::unwrap( $output );
|
|
|
|
if ( $parser ) {
|
|
// Use 'nowiki' strip marker to prevent list processing (also known as doBlockLevels()).
|
|
// However, leave the wrapping <div/> outside to prevent <p/>-wrapping.
|
|
$marker = $parser::MARKER_PREFIX . '-syntaxhighlightinner-' .
|
|
sprintf( '%08X', $parser->mMarkerIndex++ ) . $parser::MARKER_SUFFIX;
|
|
$parser->mStripState->addNoWiki( $marker, $output );
|
|
$output = $marker;
|
|
}
|
|
|
|
$output = Html::openElement( 'div', $htmlAttribs ) .
|
|
$output .
|
|
Html::closeElement( 'div' );
|
|
}
|
|
|
|
$status->value = $output;
|
|
return $status;
|
|
}
|
|
|
|
/**
|
|
* Construct a cache key for the results of a Pygments invocation.
|
|
*
|
|
* @param string $code Code to be highlighted.
|
|
* @param string $lexer Lexer name.
|
|
* @param array $options Options array.
|
|
* @return string Cache key.
|
|
*/
|
|
private static function makeCacheKeyHash( $code, $lexer, $options ) {
|
|
$optionString = FormatJson::encode( $options, false, FormatJson::ALL_OK );
|
|
return md5( "{$code}|{$lexer}|{$optionString}|" . self::CACHE_VERSION );
|
|
}
|
|
|
|
/**
|
|
* Take an input specifying a list of lines to highlight, returning
|
|
* a raw list of matching line numbers.
|
|
*
|
|
* Input is comma-separated list of lines or line ranges.
|
|
*
|
|
* @param string $lineSpec
|
|
* @return int[] Line numbers.
|
|
*/
|
|
protected static function parseHighlightLines( $lineSpec ) {
|
|
$lines = [];
|
|
$values = array_map( 'trim', explode( ',', $lineSpec ) );
|
|
foreach ( $values as $value ) {
|
|
if ( ctype_digit( $value ) ) {
|
|
$lines[] = (int)$value;
|
|
} elseif ( strpos( $value, '-' ) !== false ) {
|
|
list( $start, $end ) = array_map( 'intval', explode( '-', $value ) );
|
|
if ( self::validHighlightRange( $start, $end ) ) {
|
|
for ( $i = $start; $i <= $end; $i++ ) {
|
|
$lines[] = $i;
|
|
}
|
|
}
|
|
}
|
|
if ( count( $lines ) > self::HIGHLIGHT_MAX_LINES ) {
|
|
$lines = array_slice( $lines, 0, self::HIGHLIGHT_MAX_LINES );
|
|
break;
|
|
}
|
|
}
|
|
return $lines;
|
|
}
|
|
|
|
/**
|
|
* Validate a provided input range
|
|
* @param int $start
|
|
* @param int $end
|
|
* @return bool
|
|
*/
|
|
protected static function validHighlightRange( $start, $end ) {
|
|
// Since we're taking this tiny range and producing a an
|
|
// array of every integer between them, it would be trivial
|
|
// to DoS the system by asking for a huge range.
|
|
// Impose an arbitrary limit on the number of lines in a
|
|
// given range to reduce the impact.
|
|
return $start > 0 &&
|
|
$start < $end &&
|
|
$end - $start < self::HIGHLIGHT_MAX_LINES;
|
|
}
|
|
|
|
/**
|
|
* Hook into Content::getParserOutput to provide syntax highlighting for
|
|
* script content.
|
|
*
|
|
* @param Content $content
|
|
* @param Title $title
|
|
* @param int $revId
|
|
* @param ParserOptions $options
|
|
* @param bool $generateHtml
|
|
* @param ParserOutput &$output
|
|
* @return bool
|
|
* @since MW 1.21
|
|
*/
|
|
public static function onContentGetParserOutput( Content $content, Title $title,
|
|
$revId, ParserOptions $options, $generateHtml, ParserOutput &$output
|
|
) {
|
|
global $wgTextModelsToParse;
|
|
|
|
if ( !$generateHtml ) {
|
|
// Nothing special for us to do, let MediaWiki handle this.
|
|
return true;
|
|
}
|
|
|
|
// Determine the language
|
|
$extension = ExtensionRegistry::getInstance();
|
|
$models = $extension->getAttribute( 'SyntaxHighlightModels' );
|
|
$model = $content->getModel();
|
|
if ( !isset( $models[$model] ) ) {
|
|
// We don't care about this model, carry on.
|
|
return true;
|
|
}
|
|
$lexer = $models[$model];
|
|
|
|
// Hope that $wgSyntaxHighlightModels does not contain silly types.
|
|
$text = ContentHandler::getContentText( $content );
|
|
if ( !$text ) {
|
|
// Oops! Non-text content? Let MediaWiki handle this.
|
|
return true;
|
|
}
|
|
|
|
// Parse using the standard parser to get links etc. into the database, HTML is replaced below.
|
|
// We could do this using $content->fillParserOutput(), but alas it is 'protected'.
|
|
if ( $content instanceof TextContent && in_array( $model, $wgTextModelsToParse ) ) {
|
|
$output = MediaWikiServices::getInstance()->getParser()
|
|
->parse( $text, $title, $options, true, true, $revId );
|
|
}
|
|
|
|
$status = self::highlight( $text, $lexer, [ 'line' => true, 'linelinks' => 'L' ] );
|
|
if ( !$status->isOK() ) {
|
|
return true;
|
|
}
|
|
$out = $status->getValue();
|
|
|
|
$output->addModuleStyles( 'ext.pygments' );
|
|
$output->addModules( 'ext.pygments.linenumbers' );
|
|
$output->setText( $out );
|
|
|
|
// Inform MediaWiki that we have parsed this page and it shouldn't mess with it.
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Hook to provide syntax highlighting for API pretty-printed output
|
|
*
|
|
* @param IContextSource $context
|
|
* @param string $text
|
|
* @param string $mime
|
|
* @param string $format
|
|
* @since MW 1.24
|
|
* @return bool
|
|
*/
|
|
public static function onApiFormatHighlight( IContextSource $context, $text, $mime, $format ) {
|
|
if ( !isset( self::$mimeLexers[$mime] ) ) {
|
|
return true;
|
|
}
|
|
|
|
$lexer = self::$mimeLexers[$mime];
|
|
$status = self::highlight( $text, $lexer );
|
|
if ( !$status->isOK() ) {
|
|
return true;
|
|
}
|
|
|
|
$out = $status->getValue();
|
|
if ( preg_match( '/^<pre([^>]*)>/i', $out, $m ) ) {
|
|
$attrs = Sanitizer::decodeTagAttributes( $m[1] );
|
|
$attrs['class'] .= ' api-pretty-content';
|
|
$encodedAttrs = Sanitizer::safeEncodeTagAttributes( $attrs );
|
|
$out = '<pre' . $encodedAttrs . '>' . substr( $out, strlen( $m[0] ) );
|
|
}
|
|
$output = $context->getOutput();
|
|
$output->addModuleStyles( 'ext.pygments' );
|
|
$output->addHTML( '<div dir="ltr">' . $out . '</div>' );
|
|
|
|
// Inform MediaWiki that we have parsed this page and it shouldn't mess with it.
|
|
return false;
|
|
}
|
|
}
|