mediawiki-extensions-AbuseF.../tests/phpunit/AbuseFilterParserTest.php

984 lines
28 KiB
PHP
Raw Normal View History

<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*
* @license GPL-2.0-or-later
* @author Marius Hoch < hoo@online.de >
*/
use Psr\Log\NullLogger;
/**
* @group Test
* @group AbuseFilter
* @group AbuseFilterParser
*
* @covers AbuseFilterCachingParser
* @covers AFPTreeParser
* @covers AFPTreeNode
* @covers AFPParserState
* @covers AbuseFilterParser
* @covers AbuseFilterTokenizer
* @covers AFPToken
* @covers AFPUserVisibleException
* @covers AFPException
* @covers AFPData
* @covers AbuseFilterVariableHolder
* @covers AFComputedVariable
*/
class AbuseFilterParserTest extends AbuseFilterParserTestCase {
/**
* @param string $rule The rule to parse
* @dataProvider readTests
*/
public function testParser( $rule ) {
foreach ( $this->getParsers() as $parser ) {
$this->assertTrue( $parser->parse( $rule ), 'Parser used: ' . get_class( $parser ) );
}
}
/**
* @return Generator|array
*/
public function readTests() {
$testPath = __DIR__ . "/../parserTests";
$testFiles = glob( $testPath . "/*.t" );
foreach ( $testFiles as $testFile ) {
$testName = basename( substr( $testFile, 0, -2 ) );
$rule = trim( file_get_contents( $testFile ) );
yield $testName => [ $rule ];
}
}
/**
* Test expression evaluation
*
* @param string $expr The expression to evaluate
* @param string $expected The expected result
* @dataProvider provideExpressions
*/
public function testEvaluateExpression( $expr, $expected ) {
foreach ( $this->getParsers() as $parser ) {
$actual = $parser->evaluateExpression( $expr );
$this->assertEquals( $expected, $actual );
}
}
/**
* Data provider for testEvaluateExpression
*
* @return array
*/
public function provideExpressions() {
return [
[ '1 === 1', true ],
[ 'rescape( "abc* (def)" )', 'abc\* \(def\)' ],
[ 'str_replace( "foobarbaz", "bar", "-" )', 'foo-baz' ],
[ 'rmdoubles( "foobybboo" )', 'fobybo' ],
[ 'lcase("FÁmí")', 'fámí' ],
[ 'substr( "foobar", 0, 3 )', 'foo' ]
];
}
/**
* Test empty (or almost empty) syntax and ensure it doesn't match
*
* @param string $code
* @dataProvider provideEmptySyntax
*/
public function testEmptySyntax( $code ) {
foreach ( $this->getParsers() as $parser ) {
$this->assertFalse( $parser->parse( $code ) );
}
}
/**
* Data provider for testEmptySyntax
*
* @return array
*/
public function provideEmptySyntax() {
return [
[ '' ],
[ '()' ],
[ ';;;;' ]
];
}
/**
* Ensure that AbuseFilterTokenizer::OPERATOR_RE matches the contents
* and order of AbuseFilterTokenizer::$operators.
*/
public function testOperatorRe() {
$quotedOps = array_map(
function ( $op ) {
return preg_quote( $op, '/' );
},
AbuseFilterTokenizer::$operators
);
$operatorRe = '/(' . implode( '|', $quotedOps ) . ')/A';
$this->assertEquals( $operatorRe, AbuseFilterTokenizer::OPERATOR_RE );
}
/**
* Ensure that AbuseFilterTokenizer::RADIX_RE matches the contents
* and order of AbuseFilterTokenizer::$bases.
*/
public function testRadixRe() {
$baseClass = implode( '', array_keys( AbuseFilterTokenizer::$bases ) );
$radixRe = "/([0-9A-Fa-f]+(?:\.\d*)?|\.\d+)([$baseClass])?(?![a-z])/Au";
$this->assertEquals( $radixRe, AbuseFilterTokenizer::RADIX_RE );
}
/**
* Ensure the number of conditions counted for given expressions is right.
*
* @param string $rule The rule to parse
* @param int $expected The expected amount of used conditions
* @dataProvider condCountCases
*/
public function testCondCount( $rule, $expected ) {
foreach ( $this->getParsers() as $parser ) {
$parserClass = get_class( $parser );
$countBefore = $parser->getCondCount();
$parser->parse( $rule );
$countAfter = $parser->getCondCount();
$actual = $countAfter - $countBefore;
$this->assertEquals( $expected, $actual, "Wrong condition count for $rule with $parserClass" );
}
}
/**
* Data provider for testCondCount method.
* @return array
*/
public function condCountCases() {
return [
[ '((("a" == "b")))', 1 ],
[ 'contains_any("a", "b", "c")', 1 ],
[ '"a" == "b" & "b" == "c"', 1 ],
[ '"a" == "b" | "b" == "c"', 2 ],
[ '"a" in "b" + "c" in "d" + "e" in "f"', 3 ],
[ 'true', 0 ],
[ '"a" == "a" | "c" == "d"', 1 ],
[ '"a" == "b" & "c" == "d"', 1 ],
[ '1 = 0 & 2 * 3 * 4 <= 560 & "a" = "b"', 1 ],
[ '1 = 1 & 2 * 3 * 4 <= 560 & "a" = "b"', 3 ],
[ '1 = 1 | 2 * 3 * 4 <= 560 | "a" = "b"', 1 ],
[ '1 = 0 | 2 * 3 * 4 <= 560 | "a" = "b"', 2 ],
];
}
/**
* Test for T204841
*/
public function testArrayShortcircuit() {
$code = 'a := [false, false]; b := [false, false]; c := 42; d := [0,1];' .
'a[0] != false & b[1] != false & (b[5**2/(5*(4+1))] !== a[43-c] | a[d[0]] === b[d[c-41]])';
foreach ( $this->getParsers() as $parser ) {
$this->assertFalse( $parser->parse( $code ), 'Parser: ' . get_class( $parser ) );
}
}
/**
* Test the 'expectednotfound' exception
*
* @param string $expr The expression to test
* @param string $caller The function where the exception is thrown
* @dataProvider expectedNotFound
*/
public function testExpectedNotFoundException( $expr, $caller ) {
$this->exceptionTest( 'expectednotfound', $expr, $caller );
}
/**
* Data provider for testExpectedNotFoundException.
* The second parameter is the function where the exception is raised.
* One expression for each throw.
*
* @return array
*/
public function expectedNotFound() {
return [
[ 'a:= [1,2,3]; a[1 = 4', 'doLevelSet' ],
[ "if 1 = 1 'foo'", 'doLevelConditions' ],
[ "if 1 = 1 then 'foo'", 'doLevelConditions' ],
[ "if 1 = 1 then 'foo' else 'bar'", 'doLevelConditions' ],
[ "a := 1 = 1 ? 'foo'", 'doLevelConditions' ],
[ '(1 = 1', 'doLevelBraces' ],
[ 'lcase = 3', 'doLevelFunction' ],
[ 'lcase( 3 = 1', 'doLevelFunction' ],
[ 'a := [1,2', 'doLevelAtom' ],
[ '1 = 1 | (', 'skipOverBraces' ],
[ 'a := [1,2,3]; 3 = a[5', 'doLevelArrayElements' ],
// `set` is the name of a function. It's unclear whether the following should be allowed.
[ 'set:= 1; set contains 1', 'doLevelFunction' ],
];
}
/**
* Test the 'unexpectedatend' exception
*
* @param string $expr The expression to test
* @param string $caller The function where the exception is thrown
* @dataProvider unexpectedAtEnd
*/
public function testUnexpectedAtEndException( $expr, $caller ) {
$this->exceptionTest( 'unexpectedatend', $expr, $caller );
}
/**
* Data provider for testUnexpectedAtEndException
* The second parameter is the function where the exception is raised.
* One expression for each throw.
*
* @return array
*/
public function unexpectedAtEnd() {
return [
[ "'a' = 1 )", 'doLevelEntry' ],
];
}
/**
* Test the 'unrecognisedvar' exception
*
* @param string $expr The expression to test
* @param string $caller The function where the exception is thrown
* @dataProvider unrecognisedVar
*/
public function testUnrecognisedVarException( $expr, $caller ) {
$this->exceptionTest( 'unrecognisedvar', $expr, $caller );
}
/**
* Data provider for testUnrecognisedVarException
* The second parameter is the function where the exception is raised.
* One expression for each throw.
*
* @return array
*/
public function unrecognisedVar() {
return [
[ 'a[1] := 5', 'doLevelSet' ],
[ 'a[] := 5', 'doLevelSet' ],
[ 'a = 5', 'getVarValue' ],
[ 'false & ( nonexistent[1] := 2 )', 'skipOverBraces/discardWithHoisting' ],
[ 'false & ( nonexistent[] := 2 )', 'skipOverBraces/discardWithHoisting' ],
];
}
/**
* Test the 'notarray' exception
*
* @param string $expr The expression to test
* @param string $caller The function where the exception is thrown
* @dataProvider notArray
*/
public function testNotArrayException( $expr, $caller ) {
$this->exceptionTest( 'notarray', $expr, $caller );
}
/**
* Data provider for testNotArrayException
* The second parameter is the function where the exception is raised.
* One expression for each throw.
*
* @return array
*/
public function notArray() {
return [
[ 'a := 5; a[1] = 5', 'doLevelSet' ],
[ 'a := 1; 3 = a[5]', 'doLevelArrayElements' ],
[ 'a := 2; a[] := 2', '[different callers]' ],
[ 'a := 3; a[3] := 5', '[different callers]' ]
];
}
/**
* Test the 'outofbounds' exception
*
* @param string $expr The expression to test
* @param string $caller The function where the exception is thrown
* @dataProvider outOfBounds
*/
public function testOutOfBoundsException( $expr, $caller ) {
$this->exceptionTest( 'outofbounds', $expr, $caller );
}
/**
* Data provider for testOutOfBoundsException
* The second parameter is the function where the exception is raised.
* One expression for each throw.
*
* @return array
*/
public function outOfBounds() {
return [
[ 'a := [2]; a[5] = 9', 'doLevelSet' ],
[ 'a := [1,2,3]; 3 = a[5]', 'doLevelArrayElements' ],
[ 'a := [1]; a[15] := 5', '[different callers]' ]
];
}
/**
* Test the 'unrecognisedkeyword' exception
*
* @param string $expr The expression to test
* @param string $caller The function where the exception is thrown
* @dataProvider unrecognisedKeyword
*/
public function testUnrecognisedKeywordException( $expr, $caller ) {
$this->exceptionTest( 'unrecognisedkeyword', $expr, $caller );
}
/**
* Data provider for testUnrecognisedKeywordException
* The second parameter is the function where the exception is raised.
* One expression for each throw.
*
* @return array
*/
public function unrecognisedKeyword() {
return [
[ '5 = rlike', 'doLevelAtom' ],
];
}
/**
* Test the 'unexpectedtoken' exception
*
* @param string $expr The expression to test
* @param string $caller The function where the exception is thrown
* @dataProvider unexpectedToken
*/
public function testUnexpectedTokenException( $expr, $caller ) {
$this->exceptionTest( 'unexpectedtoken', $expr, $caller );
}
/**
* Data provider for testUnexpectedTokenException
* The second parameter is the function where the exception is raised.
* One expression for each throw.
*
* @return array
*/
public function unexpectedToken() {
return [
[ '1 =? 1', 'doLevelAtom' ],
];
}
/**
* Test the 'disabledvar' exception
*
* @param string $expr The expression to test
* @param string $caller The function where the exception is thrown
* @dataProvider disabledVar
*/
public function testDisabledVarException( $expr, $caller ) {
$this->exceptionTest( 'disabledvar', $expr, $caller );
}
/**
* Data provider for testDisabledVarException
* The second parameter is the function where the exception is raised.
* One expression for each throw.
*
* @return array
*/
public function disabledVar() {
return [
[ 'old_text = 1', 'getVarValue' ],
];
}
/**
* Test the 'variablevariable' exception
*
* @param string $expr The expression to test
* @param string $caller The function where the exception is thrown
* @dataProvider variableVariable
*/
public function testVariableVariableException( $expr, $caller ) {
$this->exceptionTest( 'variablevariable', $expr, $caller );
}
/**
* Data provider for testVariableVariableException
* The second parameter is the function where the exception is raised.
* One expression for each throw.
*
* @return array
*/
public function variableVariable() {
return [
[ "set( 'x' + 'y', 1 )", 'doLevelFunction' ],
[ "set( 'x' + page_title, 1 )", 'doLevelFunction' ],
[ "set( page_title, 1 )", 'doLevelFunction' ],
[ "set( page_title + 'x' + ( page_namespace == 0 ? 'x' : 'y' )", 'doLevelFunction' ],
];
}
/**
* Test the 'overridebuiltin' exception
*
* @param string $expr The expression to test
* @param string $caller The function where the exception is thrown
* @dataProvider overrideBuiltin
*/
public function testOverrideBuiltinException( $expr, $caller ) {
$this->exceptionTest( 'overridebuiltin', $expr, $caller );
}
/**
* Data provider for testOverrideBuiltinException
* The second parameter is the function where the exception is raised.
* One expression for each throw.
*
* @return array
*/
public function overrideBuiltin() {
return [
[ 'added_lines := 1', 'setUserVariable' ],
[ 'added_lines[] := 1', 'doLevelSet' ],
[ 'added_lines[3] := 1', 'doLevelSet' ],
[ 'page_id[3] := 1', 'doLevelSet' ],
];
}
/**
* Test the 'regexfailure' exception
*
* @param string $expr The expression to test
* @param string $caller The function where the exception is thrown
* @dataProvider regexFailure
*/
public function testRegexFailureException( $expr, $caller ) {
$this->exceptionTest( 'regexfailure', $expr, $caller );
}
/**
* Data provider for testRegexFailureException
* The second parameter is the function where the exception is raised.
* One expression for each throw.
*
* @return array
*/
public function regexFailure() {
return [
[ "rcount('(','a')", 'funcRCount' ],
[ "get_matches('this (should fail', 'any haystack')", 'funcGetMatches' ],
[ "'a' rlike '('", 'keywordRegex' ],
];
}
/**
* Test the 'invalidiprange' exception
*
* @param string $expr The expression to test
* @param string $caller The function where the exception is thrown
* @dataProvider invalidIPRange
*/
public function testInvalidIPRangeException( $expr, $caller ) {
$this->exceptionTest( 'invalidiprange', $expr, $caller );
}
/**
* Data provider for testInvalidIPRangeException
* The second parameter is the function where the exception is raised.
* One expression for each throw.
*
* @return array
*/
public function invalidIPRange() {
return [
[ "ip_in_range('0.0.0.0', 'lol')", 'funcIPInRange' ],
];
}
/**
* Test functions which take exactly one parameters calling them
* without 0 params. They should throw a 'noparams' exception.
*
* @param string $func The function to test
* @dataProvider oneParamFuncs
*/
public function testNoParamsException( $func ) {
$this->exceptionTest( 'noparams', "$func()", 'checkArgCount' );
}
/**
* Data provider for testNoParamsException, returns a list of
* functions taking a single parameter
*
* @return array
*/
public function oneParamFuncs() {
return [
[ 'lcase' ],
[ 'ucase' ],
[ 'length' ],
[ 'strlen' ],
[ 'specialratio' ],
[ 'count' ],
[ 'rcount' ],
[ 'ccnorm' ],
[ 'sanitize' ],
[ 'rmspecials' ],
[ 'rmwhitespace' ],
[ 'rmdoubles' ],
[ 'norm' ],
[ 'rescape' ],
[ 'string' ],
[ 'int' ],
[ 'float' ],
[ 'bool' ],
];
}
/**
* Test functions taking two parameters by providing only one.
* They should throw a 'notenoughargs' exception.
*
* @param string $func The function to test
* @dataProvider twoParamsFuncs
*/
public function testNotEnoughArgsExceptionTwo( $func ) {
Better handling of function params in CachingParser This patch includes various fixes to how func arguments are handled in CachingParser: - Add a comment about a future improvement of checkSyntax, which we could limit to try building the AST. - Having enough args for each function is now also checked when building the AST. This allows implementing the previous point without stopping to report notenoughargs at syntaxcheck-time (otherwise it'd be a runtime error). And it also ensure that we check for the params count inside skipped branches, e.g. inside if/else: these were already only discovered at runtime in CachingParser. The old parser is not affected by this change, because when checking syntax it will always execute all branches, and at runtime it will skip braces altogether. - Fix arg count for CachingParser, which previously added a bogus param in case of a function called without parameters. This was fixed for the other parser in I484fe2994292970276150d2e417801453339e540, and I just ported the updated fix. Also note that the CachingParser was already failing for e.g. `count()`, but instead of complaining about missing arguments, it failed hard when trying to pass NULL to evalNode. - Fixed some tests not to use setExpectedException, which caused the previous point to remain unnoticed: calling that method prevents the loop from continuing, and thus only the AbuseFilterParser part was being executed. The new implementation checks the exception ID and is thus more future-proof if the i18n message changes. - Fixed some function names in error reporting for the old parser. - The arg count is now checked outside of the function handlers, thus it's no more necessary to call checkEnoughArguments at the beginning of each handler. This also produces clearer error messages in case of aliases (e.g. set/set_var). - Check the args count even if some of the args are DUNDEFINED. This is much easier now that the check is outside of the handler. This will make syntax check fail for e.g. `contains_any(added_lines)`. Bug: T156095 Change-Id: I446a307e5395ea8cc8ec5ca5d5390b074bea2f24
2019-08-20 09:43:37 +00:00
// Nevermind if the argument can't be string since we check the amount
// of parameters before anything else.
$code = "$func('foo')";
$this->exceptionTest( 'notenoughargs', $code, 'checkArgCount' );
}
/**
* Data provider for testNotEnoughArgsExceptionTwo, returns the list of
* functions taking two parameters.
*
* @return array
*/
public function twoParamsFuncs() {
return [
[ 'get_matches' ],
[ 'ip_in_range' ],
[ 'contains_any' ],
[ 'contains_all' ],
[ 'ccnorm_contains_any' ],
[ 'ccnorm_contains_all' ],
[ 'equals_to_any' ],
[ 'substr' ],
[ 'strpos' ],
[ 'set_var' ],
];
}
/**
* Test functions taking three parameters by providing only two.
* They should throw a 'notenoughargs' exception.
*
* @param string $func The function to test
* @dataProvider threeParamsFuncs
*/
public function testNotEnoughArgsExceptionThree( $func ) {
Better handling of function params in CachingParser This patch includes various fixes to how func arguments are handled in CachingParser: - Add a comment about a future improvement of checkSyntax, which we could limit to try building the AST. - Having enough args for each function is now also checked when building the AST. This allows implementing the previous point without stopping to report notenoughargs at syntaxcheck-time (otherwise it'd be a runtime error). And it also ensure that we check for the params count inside skipped branches, e.g. inside if/else: these were already only discovered at runtime in CachingParser. The old parser is not affected by this change, because when checking syntax it will always execute all branches, and at runtime it will skip braces altogether. - Fix arg count for CachingParser, which previously added a bogus param in case of a function called without parameters. This was fixed for the other parser in I484fe2994292970276150d2e417801453339e540, and I just ported the updated fix. Also note that the CachingParser was already failing for e.g. `count()`, but instead of complaining about missing arguments, it failed hard when trying to pass NULL to evalNode. - Fixed some tests not to use setExpectedException, which caused the previous point to remain unnoticed: calling that method prevents the loop from continuing, and thus only the AbuseFilterParser part was being executed. The new implementation checks the exception ID and is thus more future-proof if the i18n message changes. - Fixed some function names in error reporting for the old parser. - The arg count is now checked outside of the function handlers, thus it's no more necessary to call checkEnoughArguments at the beginning of each handler. This also produces clearer error messages in case of aliases (e.g. set/set_var). - Check the args count even if some of the args are DUNDEFINED. This is much easier now that the check is outside of the handler. This will make syntax check fail for e.g. `contains_any(added_lines)`. Bug: T156095 Change-Id: I446a307e5395ea8cc8ec5ca5d5390b074bea2f24
2019-08-20 09:43:37 +00:00
// Nevermind if the argument can't be string since we check the amount
// of parameters before anything else.
$code = "$func('foo', 'bar')";
$this->exceptionTest( 'notenoughargs', $code, 'checkArgCount' );
}
/**
* Data provider for testNotEnoughArgsExceptionThree, returns the list of
* functions taking three parameters.
*
* @return array
*/
public function threeParamsFuncs() {
return [
[ 'str_replace' ],
];
}
/**
* @param string $code
* @dataProvider tooManyArgsFuncs
*/
public function testTooManyArgumentsException( $code ) {
$this->markTestSkipped( 'Waiting for filters to be fixed in WMF production' );
$this->exceptionTest( 'toomanyargs', $code, 'checkArgCount' );
}
/**
* @return array
*/
public function tooManyArgsFuncs() {
return [
[ "lcase( 'a', 'b' )" ],
[ "norm( 'a', 'b', 'c' )" ],
[ "count( 'a', 'b', 'c' )" ],
[ "ip_in_range( 'a', 'b', 'c' )" ],
[ "substr( 'a', 'b', 'c', 'd' )" ],
[ "str_replace( 'a', 'b', 'c', 'd', 'e' )" ],
];
}
/**
* @param string $func
* @dataProvider variadicFuncs
*/
public function testVariadicFuncsArbitraryArgsAllowed( $func ) {
$argsList = str_repeat( ', "arg"', 50 );
$code = "$func( 'arg' $argsList )";
foreach ( self::getParsers() as $parser ) {
$pname = get_class( $parser );
try {
$parser->parse( $code );
$this->assertTrue( true );
} catch ( AFPException $e ) {
$this->fail( "Got exception with parser $pname.\n$e" );
}
}
}
/**
* @return array
*/
public function variadicFuncs() {
return [
[ 'contains_any' ],
[ 'contains_all' ],
[ 'equals_to_any' ],
];
}
Better handling of function params in CachingParser This patch includes various fixes to how func arguments are handled in CachingParser: - Add a comment about a future improvement of checkSyntax, which we could limit to try building the AST. - Having enough args for each function is now also checked when building the AST. This allows implementing the previous point without stopping to report notenoughargs at syntaxcheck-time (otherwise it'd be a runtime error). And it also ensure that we check for the params count inside skipped branches, e.g. inside if/else: these were already only discovered at runtime in CachingParser. The old parser is not affected by this change, because when checking syntax it will always execute all branches, and at runtime it will skip braces altogether. - Fix arg count for CachingParser, which previously added a bogus param in case of a function called without parameters. This was fixed for the other parser in I484fe2994292970276150d2e417801453339e540, and I just ported the updated fix. Also note that the CachingParser was already failing for e.g. `count()`, but instead of complaining about missing arguments, it failed hard when trying to pass NULL to evalNode. - Fixed some tests not to use setExpectedException, which caused the previous point to remain unnoticed: calling that method prevents the loop from continuing, and thus only the AbuseFilterParser part was being executed. The new implementation checks the exception ID and is thus more future-proof if the i18n message changes. - Fixed some function names in error reporting for the old parser. - The arg count is now checked outside of the function handlers, thus it's no more necessary to call checkEnoughArguments at the beginning of each handler. This also produces clearer error messages in case of aliases (e.g. set/set_var). - Check the args count even if some of the args are DUNDEFINED. This is much easier now that the check is outside of the handler. This will make syntax check fail for e.g. `contains_any(added_lines)`. Bug: T156095 Change-Id: I446a307e5395ea8cc8ec5ca5d5390b074bea2f24
2019-08-20 09:43:37 +00:00
/**
* Check that calling a function with less arguments than required throws an exception
* when inside a skipped conditional branch.
*
* @param string $funcCode Code for a function call
* @param string $exceptionCode The ID of the expected exception
* @dataProvider provideFuncsForConditional
*/
public function testCheckArgCountInConditional( $funcCode, $exceptionCode ) {
$code = "if ( 1==1 ) then ( 1 ) else ( $funcCode ) end;";
// AbuseFilterParser skips the parentheses altogether, so this is not supposed to work
$parser = new AbuseFilterCachingParser(
new LanguageEn(),
new EmptyBagOStuff(),
new NullLogger()
);
$parser->toggleConditionLimit( false );
Better handling of function params in CachingParser This patch includes various fixes to how func arguments are handled in CachingParser: - Add a comment about a future improvement of checkSyntax, which we could limit to try building the AST. - Having enough args for each function is now also checked when building the AST. This allows implementing the previous point without stopping to report notenoughargs at syntaxcheck-time (otherwise it'd be a runtime error). And it also ensure that we check for the params count inside skipped branches, e.g. inside if/else: these were already only discovered at runtime in CachingParser. The old parser is not affected by this change, because when checking syntax it will always execute all branches, and at runtime it will skip braces altogether. - Fix arg count for CachingParser, which previously added a bogus param in case of a function called without parameters. This was fixed for the other parser in I484fe2994292970276150d2e417801453339e540, and I just ported the updated fix. Also note that the CachingParser was already failing for e.g. `count()`, but instead of complaining about missing arguments, it failed hard when trying to pass NULL to evalNode. - Fixed some tests not to use setExpectedException, which caused the previous point to remain unnoticed: calling that method prevents the loop from continuing, and thus only the AbuseFilterParser part was being executed. The new implementation checks the exception ID and is thus more future-proof if the i18n message changes. - Fixed some function names in error reporting for the old parser. - The arg count is now checked outside of the function handlers, thus it's no more necessary to call checkEnoughArguments at the beginning of each handler. This also produces clearer error messages in case of aliases (e.g. set/set_var). - Check the args count even if some of the args are DUNDEFINED. This is much easier now that the check is outside of the handler. This will make syntax check fail for e.g. `contains_any(added_lines)`. Bug: T156095 Change-Id: I446a307e5395ea8cc8ec5ca5d5390b074bea2f24
2019-08-20 09:43:37 +00:00
try {
$parser->parse( $code );
$this->fail( 'No exception was thrown.' );
} catch ( AFPUserVisibleException $e ) {
$this->assertSame( $exceptionCode, $e->mExceptionID );
}
}
/**
* Data provider for testCheckArgCountInConditional
* @return array
*/
public function provideFuncsForConditional() {
return [
[ 'count()', 'noparams' ],
[ 'bool()', 'noparams' ],
[ 'ip_in_range(1)', 'notenoughargs' ],
[ 'set_var("x")', 'notenoughargs' ],
[ 'str_replace("x","y")', 'notenoughargs' ]
];
}
/**
* Check that deprecated variables are correctly translated to the new ones with a debug notice
*
* @param string $old The old name of the variable
* @param string $new The new name of the variable
* @dataProvider provideDeprecatedVars
*/
public function testDeprecatedVars( $old, $new ) {
// Set it under the new name, and check that the old name points to it
$vars = AbuseFilterVariableHolder::newFromArray( [ $new => 'value' ] );
foreach ( $this->getParsers() as $parser ) {
$pname = get_class( $parser );
$loggerMock = new TestLogger();
$loggerMock->setCollect( true );
$parser->setLogger( $loggerMock );
$parser->setVariables( $vars );
$actual = $parser->parse( "$old === $new" );
$loggerBuffer = $loggerMock->getBuffer();
// Check that the use has been logged
$found = false;
foreach ( $loggerBuffer as $entry ) {
$check = preg_match( '/AbuseFilter: deprecated variable/', $entry[1] );
if ( $check ) {
$found = true;
break;
}
}
if ( !$found ) {
$this->fail( "The use of the deprecated variable $old was not logged. Parser: $pname" );
}
$this->assertTrue( $actual, "Parser: $pname" );
}
}
/**
* Data provider for testDeprecatedVars
* @return Generator|array
*/
public function provideDeprecatedVars() {
$deprecated = AbuseFilter::$deprecatedVars;
foreach ( $deprecated as $old => $new ) {
yield $old => [ $old, $new ];
}
}
/**
* Ensure that things like `'a' === 'b' === 'c'` or `1 < 2 < 3` are rejected, while `1 < 2 == 3`
* and `1 == 2 < 3` are not. (T218906)
* @param string $code Code to parse
* @param bool $valid Whether $code is valid (or should throw an exception)
* @dataProvider provideConsecutiveComparisons
*/
public function testDisallowConsecutiveComparisons( $code, $valid ) {
foreach ( $this->getParsers() as $parser ) {
$pname = get_class( $parser );
$actuallyValid = true;
try {
$parser->parse( $code );
} catch ( AFPUserVisibleException $e ) {
$actuallyValid = false;
}
$this->assertSame(
$valid,
$actuallyValid,
'The code should' . ( $valid ? ' ' : ' NOT ' ) . "be parsed correctly. Parser: $pname"
);
}
}
/**
* Data provider for testDisallowConsecutiveComparisons
*
* @return Generator
*/
public function provideConsecutiveComparisons() {
// Same as AbuseFilterParser::doLevelCompares
$eqOps = [ '==', '===', '!=', '!==', '=' ];
$ordOps = [ '<', '>', '<=', '>=' ];
$ops = array_merge( $eqOps, $ordOps );
foreach ( $ops as $op1 ) {
foreach ( $ops as $op2 ) {
$testStr = "1 $op1 3.14 $op2 -1";
$valid = ( in_array( $op1, $eqOps ) && in_array( $op2, $ordOps ) ) ||
( in_array( $op1, $ordOps ) && in_array( $op2, $eqOps ) );
yield $testStr => [ $testStr, $valid ];
}
}
// Some more cases with more than 2 comparisons
$extra = [
'1 === 1 < 3 === 0',
'1 === 1 < 3 === 0 < 555',
'1 < 3 === 0 < 555',
'1 < 3 === 0 < 555 !== 444',
'1 != 0 < 3 == 1 > 0 != 0'
];
foreach ( $extra as $case ) {
yield $case => [ $case, false ];
}
}
/**
* Test that code declaring a variable in a skipped brace (because of shortcircuit)
* will be parsed without throwing an exception when later trying to use that var. T214674
*
* @param string $code Code to parse
* @dataProvider provideVarDeclarationInSkippedBlock
*/
public function testVarDeclarationInSkippedBlock( $code ) {
foreach ( $this->getParsers() as $parser ) {
$pname = get_class( $parser );
try {
$this->assertFalse(
$parser->parse( $code ),
"Parser: $pname"
);
} catch ( AFPException $e ) {
$this->fail( "Got exception with parser: $pname\n$e" );
}
}
}
/**
* Data provider for testVarDeclarationInSkippedBlock
* @return array
*/
public function provideVarDeclarationInSkippedBlock() {
return [
[ "x := [5]; false & (1 == 1; y := 'b'; x[1] := 'x'; 3 < 4); y != 'b' & x[1] != 'x'" ],
[ "(var := [1]); false & ( var[] := 'baz' ); var contains 'baz'" ],
[ "(var := [1]); false & ( var[1] := 'baz' ); var[1] === 'baz'" ],
[ "false & (set('myvar', 1)); myvar contains 1" ],
// The following tests are to ensure that we don't get a match
[ "false & ( var := 'foo'; x := get_matches( var, added_lines )[1] ); x != false" ],
[ "false & ( var := 'foo'); var !== null" ],
[ "false & ( var := 'foo'); var === null" ],
[ "false & (set('myvar', 'foo')); myvar === 'foo' | myvar !== 'foo'" ],
[ "false & ( var := 'foo'); var[0] !== 123456" ],
[ "false & ( var := 'foo'); var[0][123] !== 123456" ],
[ "false & (set('myvar', 'foo')); myvar[1][2] === 'foo' | myvar[1][2] !== 'foo'" ],
// Identifier before closing skipped brace, T214674#5374757
[ "false & ( var := 'foo'; 'x' in var )" ],
[ "false & ( var := 'foo'; added_lines irlike var )" ],
[ "false & ( if ( 1 == 1 ) then (var := 3) else (var := 4) end;); var !== 'foo'" ],
[ "if ( 1 === 1 ) then ( 0 ) else ( var := 1 ) end; var !== 'foo'" ],
[ "if ( 1=== 1 ) then (0) else ( false & ( var := 1 ) ) end; var !== 'foo'" ],
];
}
/**
* Tests for the AFPData::DUNDEFINED type. No exceptions should be thrown, and nothing should match
*
* @param string $code To be parsed
* @dataProvider provideDUNDEFINED
*/
public function testDUNDEFINED( $code ) {
foreach ( $this->getParsers() as $parser ) {
$pname = get_class( $parser );
try {
$this->assertFalse(
$parser->parse( $code ),
"Parser: $pname"
);
} catch ( AFPException $e ) {
$this->fail( "Got exception with parser: $pname\n$e" );
}
}
}
/**
* Data provider for testDUNDEFINED. These bits of code must NOT match
*
* @return array
*/
public function provideDUNDEFINED() {
return [
[ "5 / length( new_wikitext ) !== 3 ** edit_delta & " .
"float( timestamp / (user_age + 0.000001) ) !== 0.0" ],
[ "amount := float( timestamp / user_age); amount !== 0.0 & 64 / ( amount - 0.1 ) !== -640.0" ],
[ "36 / ( length( user_rights ) + 0.00001 ) !== 0" ],
[ "!('something' in added_lines)" ],
[ "!(user_groups rlike 'foo')" ],
[ "rcount('x', rescape(page_title) ) !== 0" ],
[ "norm(user_name) !== rmspecials('')" ],
[ "-user_editcount !== 1234567890" ],
[ "added_lines" ],
[ "removed_lines[0] !== 123456" ],
[ "-new_size" ],
[ "new_wikitext !== null" ],
[ "true & user_editcount" ],
[ "var:= 5; added_lines contains var" ],
[ "false & (var := [ 1,2,3 ]); var === [ 1,2,3 ]" ],
[ "page_age - user_editcount !== 1234567 - page_namespace" ],
// Refuse to modify a DUNDEFINED offset as if it were an array
[ "false & (var := [ 1,2,3 ]); var[0] := true; var[0] === true" ],
[ "false & (var := [ 1,2,3 ]); var[] := 'baz'; 'baz' in var" ],
// But allow overwriting the whole variable
[ "false & (var := [ 1,2,3 ]); var := [4,5,6]; var !== [4,5,6]" ],
];
}
/**
* Test that empty operands are correctly logged. Note that this test doesn't generate coverage
* *intentionally*. This is so that if the logEmptyOperand method becomes covered, there's likely
* a bug somewhere in the parser.
* This test is only necessary for the "basic" parser
*
* @param string $code
* @param string $operandType
* @dataProvider provideEmptyOperands
*/
public function testEmptyOperands( $code, $operandType ) {
/** @var PHPUnit\Framework\MockObject\MockObject|AbuseFilterParser $mock */
$mock = $this->getMockBuilder( AbuseFilterParser::class )
->setConstructorArgs(
[ new LanguageEn(), new EmptyBagOStuff(), new NullLogger() ]
)
->setMethods( [ 'logEmptyOperand' ] )
->getMock();
$mock->expects( $this->once() )
->method( 'logEmptyOperand' )
->with( $operandType );
$mock->toggleConditionLimit( false );
$mock->parse( $code );
}
/**
* Data provider for testEmptyOperands
*
* @return array
*/
public function provideEmptyOperands() {
return [
[ '(0 |)', 'bool operand' ],
[ '(1 |)', 'bool operand' ],
[ '(0 &)', 'bool operand' ],
[ '(1 &)', 'bool operand' ],
[ '1==', 'compare operand' ],
[ '0<=', 'compare operand' ],
[ '1+', 'sum operand' ],
[ '0-', 'sum operand' ],
[ '1*', 'multiplication operand' ],
[ '1**', 'power operand' ],
[ '"string" contains', 'keyword operand' ],
[ '1 in', 'keyword operand' ],
[ "contains_any('a','b','c',)", 'function argument' ],
[ "equals_to_any('a','b',)", 'function argument' ],
[ "(!)", 'bool inversion' ],
// `(false &!)` and `(true &!)`, originally reported in T156096,
// should be used in the future to test that they throw. However,
// using them now would log twice and thus make the test fail.
[ "var :=", 'var assignment' ],
[ "var :=[];var[] :=", 'array assignment' ],
[ "var :=[1];var[0] :=", 'array assignment' ],
[ "false ? false :", 'ternary else' ],
[ "true ? false :", 'ternary else' ],
[ "-", 'unary operand' ],
[ "+", 'unary operand' ],
[ 'if () then (1) end', 'if condition' ],
[ 'if () then (1) else (1) end', 'if condition' ],
[ 'if (true) then () end', 'if body' ],
[ 'if (false) then () end', 'if body' ],
[ 'if (true) then () else (3) end', 'if body' ],
[ 'if (false) then () else (3) end', 'if body' ],
[ 'if (true) then (1) else () end', 'else body' ],
[ 'if (false) then (1) else () end', 'else body' ],
];
}
}