Port to BoxedCommand

All of the interactions with `pygmentize` have been refactored into a
new class, conviently called Pygmentize. It is responsible for getting

* pygments version (cached in APCu for 1 hour)
* generated CSS (cached in WAN by version for 1 week)
* lexer list (cached in APCu by version for 1 day)

and actually highlighting stuff! Most code paths differentiate whether
we're using a bundled version of pygments or one that has been
explicitly configured. If using the bundled one, we take shortcuts since
we already know the lexer list, have the CSS generated, etc.

ResourceLoaderPygmentsModule is added to switch between loading
generated CSS from the bundled file or Shellboxing out to get it from
pygments.

Bug: T289227
Change-Id: I2e82e5aa2a71604b87ffb4936204201d06678341
This commit is contained in:
Kunal Mehta 2021-09-02 19:30:32 -07:00
parent c8bd606cab
commit af6654e5f9
7 changed files with 382 additions and 81 deletions

View file

@ -24,6 +24,9 @@
"i18n"
]
},
"AutoloadNamespaces": {
"MediaWiki\\SyntaxHighlight\\": "includes/"
},
"AutoloadClasses": {
"SyntaxHighlight": "includes/SyntaxHighlight.php",
"SyntaxHighlightAce": "includes/SyntaxHighlightAce.php",
@ -36,12 +39,12 @@
},
"ResourceModules": {
"ext.pygments": {
"class": "MediaWiki\\SyntaxHighlight\\ResourceLoaderPygmentsModule",
"targets": [
"desktop",
"mobile"
],
"styles": [
"pygments.generated.css",
"pygments.wrapper.less"
]
},

252
includes/Pygmentize.php Normal file
View file

@ -0,0 +1,252 @@
<?php
/**
* Copyright (C) 2021 Kunal Mehta <legoktm@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*/
namespace MediaWiki\SyntaxHighlight;
use MediaWiki\MediaWikiServices;
use Shellbox\Command\BoxedCommand;
/**
* Wrapper around the `pygmentize` command
*/
class Pygmentize {
/**
* If no pygmentize is configured, use bundled
*
* @return bool
*/
public static function useBundled(): bool {
global $wgPygmentizePath;
return $wgPygmentizePath === false;
}
/**
* Get a real path to pygmentize
*
* @return string
*/
private static function getPath(): string {
global $wgPygmentizePath;
// If $wgPygmentizePath is unset, use the bundled copy.
if ( $wgPygmentizePath === false ) {
return __DIR__ . '/../pygments/pygmentize';
}
return $wgPygmentizePath;
}
/**
* Get the version of pygments
*
* @return string
*/
public static function getVersion(): string {
static $version;
if ( $version !== null ) {
return $version;
}
if ( self::useBundled() ) {
$version = self::getBundledVersion();
return $version;
}
$cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
$version = $cache->getWithSetCallback(
$cache->makeGlobalKey( 'pygmentize-version' ),
$cache::TTL_HOUR,
function () {
$result = self::boxedCommand()
->params( self::getPath(), '-V' )
->includeStderr()
->execute();
self::recordShellout( 'version' );
$output = $result->getStdout();
if ( $result->getExitCode() != 0 ||
!preg_match( '/^Pygments version (\S+),/', $output, $matches )
) {
throw new PygmentsException( $output );
}
return $matches[1];
}
);
return $version;
}
/**
* Get the version of bundled pygments
*
* @return string
*/
private static function getBundledVersion(): string {
return trim( file_get_contents( __DIR__ . '/../pygments/VERSION' ) );
}
/**
* Get the pygments generated CSS (cached)
*
* Note: if using bundled, the CSS is already available
* in modules/pygments.generated.css.
*
* @return string
*/
public static function getGeneratedCSS(): string {
$cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
return $cache->getWithSetCallback(
$cache->makeGlobalKey( 'pygmentize-css', self::getVersion() ),
$cache::TTL_WEEK,
[ __CLASS__, 'fetchGeneratedCSS' ]
);
}
/**
* Shell out to get generated CSS from pygments
*
* @internal Only public for updateCSS.php
* @return string
*/
public static function fetchGeneratedCSS(): string {
$result = self::boxedCommand()
->params(
self::getPath(), '-f', 'html',
'-S', 'default', '-a', '.mw-highlight' )
->includeStderr()
->execute();
self::recordShellout( 'generated_css' );
$output = $result->getStdout();
if ( $result->getExitCode() != 0 ) {
throw new PygmentsException( $output );
}
return $output;
}
/**
* Get the list of supported lexers by pygments (cached)
*
* @return array
*/
public static function getLexers(): array {
if ( self::useBundled() ) {
return require __DIR__ . '/../SyntaxHighlight.lexers.php';
}
$cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
return $cache->getWithSetCallback(
$cache->makeGlobalKey( 'pygmentize-lexers', self::getVersion() ),
$cache::TTL_DAY,
[ __CLASS__, 'fetchLexers' ]
);
}
/**
* Shell out to get supported lexers by pygments
*
* @internal Only public for updateLexerList.php
* @return array
*/
public static function fetchLexers(): array {
$result = self::boxedCommand()
->params( self::getPath(), '-L', 'lexer' )
->includeStderr()
->execute();
self::recordShellout( 'fetch_lexers' );
$output = $result->getStdout();
if ( $result->getExitCode() != 0 ) {
throw new PygmentsException( $output );
}
// Post-process the output, ideally pygments would output this in a
// machine-readable format (https://github.com/pygments/pygments/issues/1437)
$output = $result->getStdout();
$lexers = [];
foreach ( explode( "\n", $output ) as $line ) {
if ( substr( $line, 0, 1 ) === '*' ) {
$newLexers = explode( ', ', trim( $line, "* :\n" ) );
// Skip internal, unnamed lexers
if ( $newLexers[0] !== '' ) {
$lexers = array_merge( $lexers, $newLexers );
}
}
}
$lexers = array_unique( $lexers );
sort( $lexers );
$data = [];
foreach ( $lexers as $lexer ) {
$data[$lexer] = true;
}
return $data;
}
/**
* Actually highlight some text
*
* @param string $lexer Lexer name
* @param string $code Code to highlight
* @param array $options Options to pass to pygments
* @return string
*/
public static function highlight( $lexer, $code, array $options ): string {
$optionPairs = [];
foreach ( $options as $k => $v ) {
$optionPairs[] = "{$k}={$v}";
}
$result = self::boxedCommand()
->params(
self::getPath(),
'-l', $lexer,
'-f', 'html',
'-O', implode( ',', $optionPairs ),
'file'
)
->inputFileFromString( 'file', $code )
->execute();
self::recordShellout( 'highlight' );
$output = $result->getStdout();
if ( $result->getExitCode() != 0 ) {
throw new PygmentsException( $output );
}
return $output;
}
private static function boxedCommand(): BoxedCommand {
return MediaWikiServices::getInstance()->getShellCommandFactory()
->createBoxed( 'syntaxhighlight' )
->disableNetwork()
->firejailDefaultSeccomp()
->routeName( 'syntaxhighlight-pygments' );
}
/**
* Track how often we do each type of shellout in statsd
*
* @param string $type Type of shellout
*/
private static function recordShellout( $type ) {
$statsd = MediaWikiServices::getInstance()->getStatsdDataFactory();
$statsd->increment( "syntaxhighlight_shell.$type" );
}
}

View file

@ -0,0 +1,28 @@
<?php
/**
* Copyright (C) 2021 Kunal Mehta <legoktm@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*/
namespace MediaWiki\SyntaxHighlight;
/**
* Exceptions thrown by Pgymentize. The message
* is the combined stdout/stderr from the command.
*/
class PygmentsException extends \Exception {
}

View file

@ -0,0 +1,75 @@
<?php
/**
* Copyright (C) 2021 Kunal Mehta <legoktm@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*/
namespace MediaWiki\SyntaxHighlight;
use ResourceLoaderContext;
use ResourceLoaderFileModule;
/**
* At runtime switch between bundled CSS or dynamically generated
*/
class ResourceLoaderPygmentsModule extends ResourceLoaderFileModule {
/** @var bool */
private $useBundled;
/** @inheritDoc */
public function __construct(
array $options = [],
$localBasePath = null,
$remoteBasePath = null
) {
$this->useBundled = Pygmentize::useBundled();
if ( $this->useBundled ) {
$options['styles'][] = 'pygments.generated.css';
}
parent::__construct( $options, $localBasePath, $remoteBasePath );
}
/**
* We sometimes have generated styles
*
* @return bool
*/
public function supportsURLLoading() {
return false;
}
/** @inheritDoc */
public function getStyles( ResourceLoaderContext $context ) {
$styles = parent::getStyles( $context );
if ( !$this->useBundled ) {
$styles['all'] = ( $styles['all'] ?? '' ) . Pygmentize::getGeneratedCSS();
}
return $styles;
}
/** @inheritDoc */
public function getDefinitionSummary( ResourceLoaderContext $context ) {
$summary = parent::getDefinitionSummary( $context );
if ( !$this->useBundled ) {
$summary[] = Pygmentize::getVersion();
}
return $summary;
}
}

View file

@ -17,7 +17,8 @@
*/
use MediaWiki\MediaWikiServices;
use MediaWiki\Shell\Shell;
use MediaWiki\SyntaxHighlight\Pygmentize;
use MediaWiki\SyntaxHighlight\PygmentsException;
class SyntaxHighlight {
@ -54,7 +55,7 @@ class SyntaxHighlight {
}
if ( !$lexers ) {
$lexers = require __DIR__ . '/../SyntaxHighlight.lexers.php';
$lexers = Pygmentize::getLexers();
}
$lexer = strtolower( $lang );
@ -160,20 +161,6 @@ class SyntaxHighlight {
return $out;
}
/**
* @return string
*/
public static function getPygmentizePath() {
global $wgPygmentizePath;
// If $wgPygmentizePath is unset, use the bundled copy.
if ( $wgPygmentizePath === false ) {
$wgPygmentizePath = __DIR__ . '/../pygments/pygmentize';
}
return $wgPygmentizePath;
}
/**
* @param string $code
* @param bool $isInline
@ -275,28 +262,14 @@ class SyntaxHighlight {
$output = $cache->getWithSetCallback(
$cache->makeGlobalKey( 'highlight', self::makeCacheKeyHash( $code, $lexer, $options ) ),
$cache::TTL_MONTH,
function ( $oldValue, &$ttl ) use ( $code, $lexer, $options, &$error ) {
$optionPairs = [];
foreach ( $options as $k => $v ) {
$optionPairs[] = "{$k}={$v}";
}
$result = Shell::command(
self::getPygmentizePath(),
'-l', $lexer,
'-f', 'html',
'-O', implode( ',', $optionPairs )
)
->input( $code )
->restrict( Shell::RESTRICT_DEFAULT | Shell::NO_NETWORK )
->execute();
if ( $result->getExitCode() != 0 ) {
static function ( $oldValue, &$ttl ) use ( $code, $lexer, $options, &$error ) {
try {
return Pygmentize::highlight( $lexer, $code, $options );
} catch ( PygmentsException $e ) {
$ttl = WANObjectCache::TTL_UNCACHEABLE;
$error = $result->getStderr();
$error = $e->getMessage();
return null;
}
return $result->getStdout();
}
);

View file

@ -22,7 +22,8 @@
* @ingroup Maintenance
*/
use MediaWiki\Shell\Shell;
use MediaWiki\SyntaxHighlight\Pygmentize;
use MediaWiki\SyntaxHighlight\PygmentsException;
$IP = getenv( 'MW_INSTALL_PATH' ) ?: __DIR__ . '/../../..';
@ -34,27 +35,20 @@ class UpdateCSS extends Maintenance {
parent::__construct();
$this->requireExtension( 'SyntaxHighlight' );
$this->addDescription( 'Generate CSS code for SyntaxHighlight_GeSHi' );
$this->addDescription( 'Generate bundled CSS for SyntaxHighlight' );
}
public function execute() {
$target = __DIR__ . '/../modules/pygments.generated.css';
$css = "/* Stylesheet generated by updateCSS.php */\n";
$result = Shell::command(
SyntaxHighlight::getPygmentizePath(),
'-f', 'html',
'-S', 'default',
'-a', '.' . SyntaxHighlight::HIGHLIGHT_CSS_CLASS
)
->restrict( Shell::RESTRICT_DEFAULT | Shell::NO_NETWORK )
->execute();
if ( $result->getExitCode() != 0 ) {
$this->fatalError( 'Non-zero exit code: ' . $result->getStderr() );
try {
$output = Pygmentize::fetchGeneratedCSS();
} catch ( PygmentsException $e ) {
$this->fatalError( "Error when generating CSS:\n" . $e->getMessage() );
}
$css .= $result->getStdout();
$css .= $output;
if ( file_put_contents( $target, $css ) === false ) {
$this->output( "Failed to write to {$target}\n" );

View file

@ -22,7 +22,8 @@
* @ingroup Maintenance
*/
use MediaWiki\Shell\Shell;
use MediaWiki\SyntaxHighlight\Pygmentize;
use MediaWiki\SyntaxHighlight\PygmentsException;
use Wikimedia\StaticArrayWriter;
$IP = getenv( 'MW_INSTALL_PATH' ) ?: __DIR__ . '/../../..';
@ -34,41 +35,16 @@ class UpdateLexerList extends Maintenance {
parent::__construct();
$this->requireExtension( 'SyntaxHighlight' );
$this->addDescription( 'Update list of lexers supported by SyntaxHighlight_GeSHi' );
$this->addDescription( 'Update list of lexers supported by SyntaxHighlight' );
}
public function execute() {
$header = 'Generated by ' . basename( __FILE__ );
$lexers = [];
$result = Shell::command(
SyntaxHighlight::getPygmentizePath(),
'-L', 'lexer'
)
->restrict( Shell::RESTRICT_DEFAULT | Shell::NO_NETWORK )
->execute();
if ( $result->getExitCode() != 0 ) {
$this->fatalError( 'Non-zero exit code: ' . $result->getStderr() );
}
$output = $result->getStdout();
foreach ( explode( "\n", $output ) as $line ) {
if ( substr( $line, 0, 1 ) === '*' ) {
$newLexers = explode( ', ', trim( $line, "* :\n" ) );
// Skip internal, unnamed lexers
if ( $newLexers[0] !== '' ) {
$lexers = array_merge( $lexers, $newLexers );
}
}
}
$lexers = array_unique( $lexers );
sort( $lexers );
$data = [];
foreach ( $lexers as $lexer ) {
$data[$lexer] = true;
try {
$data = Pygmentize::fetchLexers();
} catch ( PygmentsException $e ) {
$this->fatalError( "Error when getting lexers:\n" . $e->getMessage() );
}
$writer = new StaticArrayWriter();