Validate TeX input for all renderers, not just texvc

The user input specified in the math tag a. la
<math>E=m <script>alert('attacked')</script>^2 </math>
is verified in PNG rendering mode, but not in plaintext, MathJax
or LaTeXML rendering mode. This is a potential security issue.

Furthermore, the texvc specific commands such as $\reals$
that is expanded to $\mathbb{R}$ might be rendered differently
depended on the rendering mode.

Therefore, the security checking and rewriting portion of texvc
have been extracted from the texvc source
(see I1650e6ec2ccefff6335fbc36bbe8ca8f59db0faa) and are
now available as a separate executable (texvccheck).

This commit will now enable this enhancement in security and
provide even more compatibility among the different rendering
modes.

Bug: 49169
Change-Id: Ida24b6bf339508753bed40d2e218c4a5b7fe7d0c
This commit is contained in:
physikerwelt 2014-01-03 14:29:03 +00:00 committed by Physikerwelt
parent ca14ffab73
commit 6a0af8f3b4
8 changed files with 369 additions and 21 deletions

View file

@ -42,7 +42,7 @@ class MathHooks {
* @return string
*/
static function mathTagHook( $content, $attributes, $parser ) {
global $wgContLang, $wgUseMathJax;
global $wgMathDisableTexFilter, $wgContLang, $wgUseMathJax;
if ( trim( $content ) === "" ) { // bug 8372
return "";
}
@ -51,6 +51,15 @@ class MathHooks {
$renderer = MathRenderer::getRenderer(
$content, $attributes, $mode
);
if ( !$wgMathDisableTexFilter ) {
$checkResult = $renderer->checkTex();
if ( $checkResult !== true ){
// returns the error message
return $renderer->getLastError();
}
}
$renderedMath = $renderer->render();
if ( $wgUseMathJax && $mode == MW_MATH_MATHJAX ) {
$parser->getOutput()->addModules( array( 'ext.math.mathjax.enabler' ) );

View file

@ -132,6 +132,16 @@ $wgLaTeXMLTimeout = 240;
* See http://dlmf.nist.gov/LaTeXML/manual/commands/latexmlpost.xhtml for details.
*/
$wgDefaultLaTeXMLSetting = 'format=xhtml&whatsin=math&whatsout=math&pmml&cmml&nodefaultresources&preload=LaTeX.pool&preload=article.cls&preload=amsmath.sty&preload=amsthm.sty&preload=amstext.sty&preload=amssymb.sty&preload=eucal.sty&preload=[dvipsnames]xcolor.sty&preload=url.sty&preload=hyperref.sty&preload=[ids]latexml.sty&preload=texvc';
/**
* The link to the texvc executable
*/
$wgMathTexvcCheckExecutable = dirname( __FILE__ ) . '/texvccheck/texvccheck';
/**
* Option to disable the tex filter. If set to true any LaTeX espression is parsed
* this can be a potential security risk. If set to false only a subset of the TeX
* commands is allowed. See the wikipedia page Help:Math for details.
*/
$wgMathDisableTexFilter = false;
////////// end of config settings.
$wgDefaultUserOptions['math'] = MW_MATH_PNG;
@ -150,6 +160,8 @@ $wgAutoloadClasses['MathRenderer'] = $dir . 'MathRenderer.php';
$wgAutoloadClasses['MathTexvc'] = $dir . 'MathTexvc.php';
$wgAutoloadClasses['MathSource'] = $dir . 'MathSource.php';
$wgAutoloadClasses['MathLaTeXML'] = $dir . 'MathLaTeXML.php';
$wgAutoloadClasses['MathInputCheck'] = $dir . 'MathInputCheck.php';
$wgAutoloadClasses['MathInputCheckTexvc'] = $dir . 'MathInputCheckTexvc.php';
$wgExtensionMessagesFiles['Math'] = $dir . 'Math.i18n.php';
$wgParserTestFiles[] = $dir . 'mathParserTests.txt';

51
MathInputCheck.php Normal file
View file

@ -0,0 +1,51 @@
<?php
/**
* MediaWiki math extension
*
* (c) 2002-2014 Tomasz Wegrzanowski, Brion Vibber, Moritz Schubotz, and other MediaWiki contributors
* GPLv2 license; info in main package.
*
* @author Moritz Schubotz
*/
abstract class MathInputCheck {
protected $inputTeX;
protected $validTeX;
protected $isValid = false;
protected $lastError = null;
/**
* Default constructor
* (performs no checking)
* @param String $tex the TeX InputString to be checked
*/
public function __construct( $tex = '' ) {
$this->inputTeX = $tex;
$this->isValid = false;
}
/**
* Returns true if the TeX input String is valid
* @return boolean
*/
public function isValid() {
return $this->isValid;
}
/**
* Returns the string of the last error.
* @return string
*/
public function getError() {
return $this->lastError;
}
/**
* Some TeX checking programs may return
* a modified tex string after having checked it.
* You can get the altered tex string with this method
* @return string A valid Tex string
*/
public function getValidTex() {
return $this->validTeX;
}
}

90
MathInputCheckTexvc.php Normal file
View file

@ -0,0 +1,90 @@
<?php
/**
* MediaWiki math extension
*
* (c) 2002-2013 Tomasz Wegrzanowski, Brion Vibber, Moritz Schubotz, and other MediaWiki contributors
* GPLv2 license; info in main package.
*
* @author Moritz Schubotz
*/
class MathInputCheckTexvc extends MathInputCheck {
/**
* Converts an error returned by texvc to a localized exception
*
* @param string $texvcResult error result returned by texvc
*/
public function convertTexvcError( $texvcResult, $errorRenderer = false ) {
$texvcStatus = substr( $texvcResult, 0, 1 );
$errDetails = htmlspecialchars( substr( $texvcResult, 1 ) );
if ( $errorRenderer === false ) {
$errorRenderer = new MathSource( $this->inputTeX );
}
switch ($texvcStatus) {
case 'E':
$errMsg = $errorRenderer->getError( 'math_lexing_error' );
break;
case 'S':
$errMsg = $errorRenderer->getError( 'math_syntax_error' );
break;
case 'F':
$errMsg = $errorRenderer->getError( 'math_unknown_function', $errDetails );
break;
default:
$errMsg = $errorRenderer->getError( 'math_unknown_error' );
}
return $errMsg;
}
/**
*
* @global type $wgTexvc
* @return boolean
*/
public function isValid() {
global $wgMathTexvcCheckExecutable;
if ( !is_executable( $wgMathTexvcCheckExecutable ) ) {
$msg = wfMessage( 'math_notexvc' )->inContentLanguage()->escaped();
trigger_error( $msg, E_USER_NOTICE );
wfDebugLog( 'Math', $msg );
return true;
}
$cmd = $wgMathTexvcCheckExecutable . ' ' . wfEscapeShellArg( $this->inputTeX );
if ( wfIsWindows() ) {
# Invoke it within cygwin sh, because texvc expects sh features in its default shell
$cmd = 'sh -c ' . wfEscapeShellArg($cmd);
}
wfDebugLog( 'Math', "TeX check command: $cmd\n" );
$contents = wfShellExec( $cmd );
wfDebugLog( 'Math', "TeX check result:\n $contents\n---\n" );
if ( strlen($contents) === 0 ) {
wfDebugLog( 'Math', "TeX check output was empty. \n" );
$this->lastError = MathRenderer::getError( 'math_unknown_error' );
return false;
}
$retval = substr( $contents, 0, 1 );
if ( $retval !== '+' ) {
$this->lastError = $this->convertTexvcError( $contents );
wfDebugLog( 'Math', 'checkTex failed:' . $this->lastError );
return false;
} else {
$this->validTeX = substr( $contents, 1 );
$this->isSecure = true;
wfDebugLog( 'Math', 'checkTex successful tex is now: ' . $this->validTeX );
return true;
}
}
}

View file

@ -27,6 +27,8 @@ abstract class MathRenderer {
*/
protected $mode = MW_MATH_PNG;
protected $tex = '';
/** @var string the original user input string (which was used to caculate the inputhash) */
protected $userInputTex = '';
/**
* is calculated by texvc.
* @var string
@ -36,6 +38,9 @@ abstract class MathRenderer {
protected $mathml = '';
protected $conservativeness = 0;
protected $params = '';
//STATE OF THE CLASS INSTANCE
/** @var boolean has variable tex been security-checked */
protected $texSecure = false;
protected $changed = false;
/**
* @var boolean forces rerendering if set to true
@ -51,6 +56,7 @@ abstract class MathRenderer {
* @param array $params (optional) HTML attributes
*/
public function __construct( $tex = '', $params = array() ) {
$this->userInputTex = $tex;
$this->tex = $tex;
$this->params = $params;
}
@ -114,7 +120,7 @@ abstract class MathRenderer {
* @param Varargs $parameters (optional) zero or more message parameters for specific error
* @return string HTML error string
*/
protected function getError( $msg /*, ... */ ) {
public function getError( $msg /*, ... */ ) {
$mf = wfMessage( 'math_failure' )->inContentLanguage()->escaped();
$parameters = func_get_args();
array_shift( $parameters );
@ -415,5 +421,27 @@ abstract class MathRenderer {
function getLastError() {
return $this->lastError;
}
/**
* Get if the input tex was marked as secure
* @return boolean
*/
public function isTexSecure() {
return $this->texSecure;
}
public function checkTex() {
if ( !$this->texSecure ) {
$checker = new MathInputCheckTexvc( $this->userInputTex );
if ( $checker->isValid() ){
$this->setTex( $checker->getValidTex() );
$this->texSecure = true;
return true;
} else {
$this->lastError = $checker->getError();
return false;
}
}
}
}

View file

@ -100,24 +100,8 @@ class MathTexvc extends MathRenderer {
* @param string $texvcResult error result returned by texvc
*/
public function convertTexvcError( $texvcResult ) {
$texvcStatus = substr( $texvcResult, 0, 1 );
$errDetails = htmlspecialchars( substr( $texvcResult, 1 ) );
switch( $texvcStatus ) {
case 'E':
$errMsg = $this->getError( 'math_lexing_error' );
break;
case 'S':
$errMsg = $this->getError( 'math_syntax_error' );
break;
case 'F':
$errMsg = $this->getError( 'math_unknown_function', $errDetails );
break;
default:
$errMsg = $this->getError( 'math_unknown_error' );
}
return $errMsg;
$errorConverter = new MathInputCheckTexvc();
return $errorConverter->convertTexvcError( $texvcResult, $this );
}
/**
@ -364,4 +348,4 @@ class MathTexvc extends MathRenderer {
return false;
}
}
}

View file

@ -0,0 +1,33 @@
<?php
/**
* Generated by PHPUnit_SkeletonGenerator 1.2.1 on 2013-09-24 at 03:36:12.
*/
class MathInputCheckTest extends MediaWikiTestCase
{
/**
* @covers MathInputCheck::isValid
*/
public function testIsValid() {
$InputCheck = $this->getMockBuilder( 'MathInputCheck' )->getMock();
$this->assertEquals( $InputCheck->IsValid() , false );
}
/**
* @covers MathInputCheck::getError
* @todo Implement testGetError().
*/
public function testGetError() {
$InputCheck = $this->getMockBuilder( 'MathInputCheck' )->getMock();
$this->assertNull( $InputCheck->getError() );
}
/**
* @covers MathInputCheck::getValidTex
*/
public function testGetValidTex() {
$InputCheck = $this->getMockBuilder( 'MathInputCheck' )
->setConstructorArgs(array('some tex input'))
->getMock();
$this->assertNull( $InputCheck->getValidTex() );
}
}

View file

@ -0,0 +1,141 @@
<?php
/**
* Generated by PHPUnit_SkeletonGenerator 1.2.1 on 2013-09-24 at 03:46:07.
*/
class MathInputCheckTexvcTest extends MediaWikiTestCase {
/**
* @var MathInputCheckTexvc
*/
protected $BadObject;
protected $GoodObject;
/**
* Sets up the fixture, for example, opens a network connection.
* This method is called before a test is executed.
*/
protected function setUp() {
global $wgMathTexvcCheckExecutable;
parent::setUp();
$this->BadObject = new MathInputCheckTexvc( '\newcommand{\text{do evil things}}' );
$this->GoodObject = new MathInputCheckTexvc( '\sin\left(\frac12x\right)' );
if ( !is_executable( $wgMathTexvcCheckExecutable ) ) {
$this->markTestSkipped( "No texvc installed on server" );
}
}
/**
* This is not a real phpUnit test.
* It's just to discover whether setting default values dependent
* on the existence of executables influences the performance reaonably.
* The test is disabled by default. You can enable it via
* php .../tests/phpunit/phpunit.php --group Utility ...
* @group Utility
*/
public function testPerformanceIsExecutable() {
global $wgMathTexvcCheckExecutable;
/** @var int the number of runs used in that test */
$numberOfRuns = 10;
/** @var double the maximal average time accetable for a execution of is_executable in seconds*/
$maxAvgTime = .001;
$tstart = microtime( true );
for ($i = 1; $i <= $numberOfRuns; $i++){
is_executable( $wgMathTexvcCheckExecutable );
}
$time = microtime( true ) - $tstart;
$this->assertTrue( $time < $maxAvgTime * $numberOfRuns, 'function is_executable consumes too much time' );
}
/**
* Tears down the fixture, for example, closes a network connection.
* This method is called after a test is executed.
*/
protected function tearDown() {
}
/**
* @covers MathInputCheckTexvc::testGetError
*/
public function testGetError() {
$this->assertNull( $this->GoodObject->getError() );
$this->assertNull( $this->BadObject->getError() );
$this->BadObject->isValid();
$this->GoodObject->isValid();
$this->assertNull( $this->GoodObject->getError() );
$expectedMessage = wfMessage( 'math_unknown_function', '\newcommand' )->inContentLanguage()->escaped();
$this->assertContains( $expectedMessage , $this->BadObject->getError());
}
/**
* @covers MathInputCheckTexvc::isValid
*/
public function testIsValid() {
$this->assertFalse( $this->BadObject->isValid() );
$this->assertTrue( $this->GoodObject->isValid() );
}
/**
* @covers MathInputCheckTexvc::getValidTex
*/
public function testGetValidTex() {
$this->assertNull( $this->GoodObject->getValidTex() );
$this->assertNull( $this->BadObject->getValidTex() );
$this->BadObject->isValid();
$this->GoodObject->isValid();
$this->assertNull( $this->BadObject->getValidTex() );
// Be aware of the additional brackets and spaces inserted here
$this->assertEquals( $this->GoodObject->getValidTex() , "\\sin \\left({\\frac 12}x\\right)" );
}
/**
* Test corner cases of texvccheck conversion
* @covers MathInputCheckTexvc::getValidTex
*/
public function testGetValidTexCornerCases() {
$Object = new MathInputCheckTexvc( '\reals' );
$Object->isValid();
$this->assertEquals( "\\mathbb{R} ", $Object->getValidTex() );
$Object = new MathInputCheckTexvc( '\lbrack' ); // Bug: 54624
$Object->isValid();
$this->assertEquals( '\\lbrack ', $Object->getValidTex() );
}
/**
* Tests behavior of convertTexvcError
* The method was moved from MathTexvc to MathInputCheckTexvc
* @covers MathTexvc::convertTexvcError
*/
public function testConvertTexvcError() {
$texvc = $this->getMockBuilder( 'MathInputCheckTexvc' )
->setMethods(NULL)
->disableOriginalConstructor()
->getMock();
$mathFailure = wfMessage( 'math_failure' )->inContentLanguage()->escaped();
$actualLexing = $texvc->convertTexvcError( 'E' );
$expectedLexing = wfMessage( 'math_lexing_error', '' )->inContentLanguage()->escaped();
$this->assertContains( $mathFailure, $actualLexing, 'Lexing error contains general math failure message' );
$this->assertContains( $expectedLexing, $actualLexing, 'Lexing error contains detailed error for lexing' );
$actualSyntax = $texvc->convertTexvcError( 'S' );
$expectedSyntax = wfMessage( 'math_syntax_error', '' )->inContentLanguage()->escaped();
$this->assertContains( $mathFailure, $actualSyntax, 'Syntax error contains general math failure message' );
$this->assertContains( $expectedSyntax, $actualSyntax, 'Syntax error contains detailed error for syntax' );
$unknownFunction = 'figureEightIntegral';
$actualUnknownFunction = $texvc->convertTexvcError( "F$unknownFunction" );
$expectedUnknownFunction = wfMessage( 'math_unknown_function', $unknownFunction )->inContentLanguage()->escaped();
$this->assertContains( $mathFailure, $actualUnknownFunction, 'Unknown function error contains general math failure message' );
$this->assertContains( $expectedUnknownFunction, $actualUnknownFunction, 'Unknown function error contains detailed error for unknown function' );
$actualUnknownError = $texvc->convertTexvcError( 'Q' );
$expectedUnknownError = wfMessage( 'math_unknown_error', '' )->inContentLanguage()->escaped();
$this->assertContains( $mathFailure, $actualUnknownError, 'Unknown error contains general math failure message' );
$this->assertContains( $expectedUnknownError, $actualUnknownError, 'Unknown error contains detailed error for unknownError' );
}
}