mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/AbuseFilter.git
synced 2024-11-27 15:30:42 +00:00
Merge "Add a static analyzer for the filter language"
This commit is contained in:
commit
199cf1edf8
|
@ -458,6 +458,7 @@
|
|||
"abusefilter-exception-toomanyargs": "Too many arguments to function $2 called at character $1.\nExpected at most $3 {{PLURAL:$3|argument|arguments}}, got $4",
|
||||
"abusefilter-exception-regexfailure": "Error in regular expression \"$2\" at character $1.",
|
||||
"abusefilter-exception-overridebuiltin": "Illegal overriding of built-in identifier \"$2\" at character $1.",
|
||||
"abusefilter-exception-usebuiltin": "Illegal use of built-in identifier \"$2\" at character $1.",
|
||||
"abusefilter-exception-outofbounds": "Requesting non-existent array item $2 (array size = $3) at character $1.",
|
||||
"abusefilter-exception-negativeindex": "Negative indexes are not allowed in arrays. Got index \"$2\" at character $1.",
|
||||
"abusefilter-exception-notarray": "Requesting array item of non-array at character $1.",
|
||||
|
@ -465,6 +466,8 @@
|
|||
"abusefilter-exception-invalidiprange": "Invalid IP range \"$2\" provided at character $1.",
|
||||
"abusefilter-exception-disabledvar": "Variable $2 at character $1 is no longer in use.",
|
||||
"abusefilter-exception-variablevariable": "set and set_var expect the first argument to be a string literal, found at character $1.",
|
||||
"abusefilter-exception-unknownfunction": "$2 is not a valid function at character $1.",
|
||||
"abusefilter-exception-unusedvars": "The following variables are unused: $2.",
|
||||
"abusefilter-parser-warning-match-empty-regex": "This regular expression matches the empty string, at character $1",
|
||||
"abusefilter-action-tag": "Tag",
|
||||
"abusefilter-action-throttle": "Throttle",
|
||||
|
|
|
@ -498,6 +498,7 @@
|
|||
"abusefilter-exception-toomanyargs": "Error message from the abuse filter parser. Parameters:\n* $1 - position in the string (numeral)\n* $2 - a function name\n* $3 - the number of expected arguments\n* $4 - the number of passed arguments (also supports PLURAL)",
|
||||
"abusefilter-exception-regexfailure": "Error message from the abuse filter parser. Parameters:\n* $1 - Position in the string\n* $2 - Regular expression",
|
||||
"abusefilter-exception-overridebuiltin": "Error message from the abuse filter parser. Parameters:\n* $1 - Position in the string\n* $2 - Built-in identifier",
|
||||
"abusefilter-exception-usebuiltin": "Error message from the abuse filter parser. Parameters:\n* $1 - Position in the string\n* $2 - Built-in identifier",
|
||||
"abusefilter-exception-outofbounds": "Error message from the abuse filter parser. Parameters:\n* $1 - Position in the string\n* $2 - Index\n* $3 - Number of items in array",
|
||||
"abusefilter-exception-negativeindex": "Error message from the abuse filter parser. Parameters:\n* $1 - Position in the string\n* $2 - Index",
|
||||
"abusefilter-exception-notarray": "Error message from the abuse filter parser. Parameters:\n* $1 - Position in the string",
|
||||
|
@ -505,6 +506,8 @@
|
|||
"abusefilter-exception-invalidiprange": "Error message from the abuse filter parser. Parameters:\n* $1 - Position in the string\n* $2 - String provided as an argument to a function",
|
||||
"abusefilter-exception-disabledvar": "Error message from the abuse filter parser. Parameters:\n* $1 - Position in the string\n* $2 - Name of the disabled variable",
|
||||
"abusefilter-exception-variablevariable": "{{doc-important|Do not translate \"'''set'''\" and \"'''set_var'''\".}} Error message from the abuse filter parser. Parameters:\n* $1 - Position in the string",
|
||||
"abusefilter-exception-unknownfunction": "Error message from the abuse filter parser. Parameters:\n* $1 - Position in the string\n* $2 - Name of the unknown function",
|
||||
"abusefilter-exception-unusedvars": "Error message from the abuse filter parser. Parameters:\n* $1 - (unused)\n* $2 - Names of unused variables",
|
||||
"abusefilter-parser-warning-match-empty-regex": "Warning message from the abuse filter parser. Parameters:\n* $1 - Position in the string",
|
||||
"abusefilter-action-tag": "{{doc-abusefilter-action}}\n\nThe edit or change can be 'tagged' with a particular tag, which will be shown on Recent Changes, contributions, logs, new pages, history, and everywhere else. \n\nThis is a verb in the imperative form.\n\n{{Identical|Tag}}",
|
||||
"abusefilter-action-throttle": "{{doc-abusefilter-action}}",
|
||||
|
|
|
@ -23,7 +23,7 @@ class AFPSyntaxTree {
|
|||
/**
|
||||
* @return AFPTreeNode|null
|
||||
*/
|
||||
public function getRoot() {
|
||||
public function getRoot(): ?AFPTreeNode {
|
||||
return $this->rootNode;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -92,9 +92,10 @@ abstract class AFPTransitionBase {
|
|||
*
|
||||
* @param array $args The arguments supplied to the function
|
||||
* @param string $func The function name
|
||||
* @param int $position
|
||||
* @throws UserVisibleException
|
||||
*/
|
||||
protected function checkArgCount( $args, $func ) {
|
||||
protected function checkArgCount( $args, $func, int $position ) {
|
||||
if ( !array_key_exists( $func, self::FUNC_ARG_COUNT ) ) {
|
||||
// @codeCoverageIgnoreStart
|
||||
throw new InvalidArgumentException( "$func is not a valid function." );
|
||||
|
@ -104,13 +105,13 @@ abstract class AFPTransitionBase {
|
|||
if ( count( $args ) < $min ) {
|
||||
throw new UserVisibleException(
|
||||
$min === 1 ? 'noparams' : 'notenoughargs',
|
||||
$this->mPos,
|
||||
$position,
|
||||
[ $func, $min, count( $args ) ]
|
||||
);
|
||||
} elseif ( count( $args ) > $max ) {
|
||||
throw new UserVisibleException(
|
||||
'toomanyargs',
|
||||
$this->mPos,
|
||||
$position,
|
||||
[ $func, $max, count( $args ) ]
|
||||
);
|
||||
}
|
||||
|
|
|
@ -81,6 +81,15 @@ class AFPTreeNode {
|
|||
// token corresponding to the literal.
|
||||
public const ATOM = 'ATOM';
|
||||
|
||||
// BINOP is a combination of LOGIC (^), COMPARE (<=, <, etc.),
|
||||
// SUM_REL (+, -), MUL_REL (*, /, %), POW (**),
|
||||
// KEYWORD_OPERATOR (like, rlike, etc.), and ARRAY_INDEX ([]).
|
||||
// The format is (operator, operand, operand).
|
||||
// Currently, it's only used in SyntaxChecker
|
||||
// & and | which is in LOGIC is not in BINOP because it affects
|
||||
// control flow.
|
||||
public const BINOP = 'BINOP';
|
||||
|
||||
/** @var string Type of the node, one of the constants above */
|
||||
public $type;
|
||||
/**
|
||||
|
@ -94,14 +103,6 @@ class AFPTreeNode {
|
|||
/** @var int Position used for error reporting. */
|
||||
public $position;
|
||||
|
||||
/**
|
||||
* @var string[] Names of the variables assigned in this node or any of its descendants
|
||||
* @todo We could change this to be an instance of a new AFPScope class (holding a var map)
|
||||
* if we'll have the need to store other scope-specific data,
|
||||
* see <https://phabricator.wikimedia.org/T230982#5475400>.
|
||||
*/
|
||||
private $innerAssignments = [];
|
||||
|
||||
/**
|
||||
* @param string $type
|
||||
* @param (AFPTreeNode|null)[]|string[]|AFPToken $children
|
||||
|
@ -111,56 +112,6 @@ class AFPTreeNode {
|
|||
$this->type = $type;
|
||||
$this->children = $children;
|
||||
$this->position = $position;
|
||||
$this->populateInnerAssignments();
|
||||
}
|
||||
|
||||
/**
|
||||
* Save in this node all the variable names used in the children, and in this node if it's an
|
||||
* assignment-related node. Note that this doesn't check whether the variable is custom or builtin:
|
||||
* this is already checked when calling setUserVariable.
|
||||
* In case we'll ever need to store other data in a node, or maybe even a Scope object, this could
|
||||
* be moved to a different class which could also re-visit the whole AST.
|
||||
*/
|
||||
private function populateInnerAssignments() {
|
||||
if ( $this->type === self::ATOM ) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
$this->type === self::ASSIGNMENT ||
|
||||
$this->type === self::INDEX_ASSIGNMENT ||
|
||||
$this->type === self::ARRAY_APPEND
|
||||
) {
|
||||
$this->innerAssignments = [ $this->children[0] ];
|
||||
} elseif (
|
||||
$this->type === self::FUNCTION_CALL &&
|
||||
in_array( $this->children[0], [ 'set', 'set_var' ] ) &&
|
||||
// If unset, parsing will fail when checking arguments
|
||||
isset( $this->children[1] )
|
||||
) {
|
||||
$varnameNode = $this->children[1];
|
||||
if ( $varnameNode->type !== self::ATOM ) {
|
||||
// Shouldn't happen since variable variables are not allowed
|
||||
// @codeCoverageIgnoreStart
|
||||
throw new InternalException( "Got non-atom type {$varnameNode->type} for set_var" );
|
||||
// @codeCoverageIgnoreEnd
|
||||
}
|
||||
$this->innerAssignments = [ $varnameNode->children->value ];
|
||||
}
|
||||
|
||||
// @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach ATOM excluded above
|
||||
foreach ( $this->children as $child ) {
|
||||
if ( $child instanceof self ) {
|
||||
$this->innerAssignments = array_merge( $this->innerAssignments, $child->getInnerAssignments() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getInnerAssignments(): array {
|
||||
return $this->innerAssignments;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -594,40 +594,14 @@ class AFPTreeParser extends AFPTransitionBase {
|
|||
* @throws UserVisibleException
|
||||
*/
|
||||
protected function doLevelFunction() {
|
||||
$next = $this->getNextToken();
|
||||
if ( $this->mCur->type === AFPToken::TID &&
|
||||
isset( AbuseFilterCachingParser::FUNCTIONS[$this->mCur->value] )
|
||||
$next->type === AFPToken::TBRACE &&
|
||||
$next->value === '('
|
||||
) {
|
||||
$func = $this->mCur->value;
|
||||
$position = $this->mPos;
|
||||
$this->move();
|
||||
if ( $this->mCur->type !== AFPToken::TBRACE || $this->mCur->value !== '(' ) {
|
||||
throw new UserVisibleException( 'expectednotfound',
|
||||
$this->mPos,
|
||||
[
|
||||
'(',
|
||||
$this->mCur->type,
|
||||
$this->mCur->value
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
if ( ( $func === 'set' || $func === 'set_var' ) ) {
|
||||
$state = $this->getState();
|
||||
$this->move();
|
||||
$next = $this->getNextToken();
|
||||
if (
|
||||
$this->mCur->type !== AFPToken::TSTRING ||
|
||||
(
|
||||
$next->type !== AFPToken::TCOMMA &&
|
||||
// Let this fail later, when checking parameters count
|
||||
!( $next->type === AFPToken::TBRACE && $next->value === ')' )
|
||||
)
|
||||
) {
|
||||
throw new UserVisibleException( 'variablevariable', $this->mPos, [] );
|
||||
} else {
|
||||
$this->setState( $state );
|
||||
}
|
||||
}
|
||||
|
||||
$args = [];
|
||||
$next = $this->getNextToken();
|
||||
|
@ -661,11 +635,6 @@ class AFPTreeParser extends AFPTransitionBase {
|
|||
]
|
||||
);
|
||||
}
|
||||
// Giving too few arguments to a function is a pretty common error. If we check it here
|
||||
// (as well as at runtime, for OCD), we can make checkSyntax only try to build the AST, as
|
||||
// there would be way less runtime errors. Moreover, this check will also be performed inside
|
||||
// skipped branches, e.g. the discarded if/else branch.
|
||||
$this->checkArgCount( $args, $func );
|
||||
$this->move();
|
||||
|
||||
array_unshift( $args, $func );
|
||||
|
|
|
@ -249,6 +249,7 @@ class AbuseFilterCachingParser extends AFPTransitionBase {
|
|||
self::CACHE_VERSION,
|
||||
AFPTreeParser::CACHE_VERSION,
|
||||
AbuseFilterTokenizer::CACHE_VERSION,
|
||||
SyntaxChecker::CACHE_VERSION,
|
||||
array_keys( self::FUNCTIONS ),
|
||||
array_keys( self::KEYWORDS ),
|
||||
];
|
||||
|
@ -338,7 +339,6 @@ class AbuseFilterCachingParser extends AFPTransitionBase {
|
|||
public function intEval( $code ): AFPData {
|
||||
$startTime = microtime( true );
|
||||
$tree = $this->getTree( $code );
|
||||
|
||||
$res = $this->evalTree( $tree );
|
||||
|
||||
if ( $res->getType() === AFPData::DUNDEFINED ) {
|
||||
|
@ -396,7 +396,15 @@ class AbuseFilterCachingParser extends AFPTransitionBase {
|
|||
$this->fromCache = false;
|
||||
$parser = new AFPTreeParser( $this->cache, $this->logger, $this->statsd, $this->keywordsManager );
|
||||
$parser->setFilter( $this->mFilter );
|
||||
return $parser->parse( $code );
|
||||
$tree = $parser->parse( $code );
|
||||
$checker = new SyntaxChecker(
|
||||
$tree,
|
||||
$this->keywordsManager,
|
||||
SyntaxChecker::MCONSERVATIVE,
|
||||
false
|
||||
);
|
||||
$checker->start();
|
||||
return $tree;
|
||||
}
|
||||
);
|
||||
}
|
||||
|
@ -608,9 +616,6 @@ class AbuseFilterCachingParser extends AFPTransitionBase {
|
|||
case AFPTreeNode::INDEX_ASSIGNMENT:
|
||||
list( $varName, $offset, $value ) = $node->children;
|
||||
|
||||
if ( $this->isReservedIdentifier( $varName ) ) {
|
||||
throw new UserVisibleException( 'overridebuiltin', $node->position, [ $varName ] );
|
||||
}
|
||||
$array = $this->getVarValue( $varName );
|
||||
|
||||
if ( $array->getType() !== AFPData::DARRAY && $array->getType() !== AFPData::DUNDEFINED ) {
|
||||
|
@ -650,10 +655,6 @@ class AbuseFilterCachingParser extends AFPTransitionBase {
|
|||
case AFPTreeNode::ARRAY_APPEND:
|
||||
list( $varName, $value ) = $node->children;
|
||||
|
||||
if ( $this->isReservedIdentifier( $varName ) ) {
|
||||
throw new UserVisibleException( 'overridebuiltin', $node->position, [ $varName ] );
|
||||
}
|
||||
|
||||
$array = $this->getVarValue( $varName );
|
||||
$value = $this->evalNode( $value );
|
||||
if ( $array->getType() !== AFPData::DUNDEFINED ) {
|
||||
|
@ -708,7 +709,6 @@ class AbuseFilterCachingParser extends AFPTransitionBase {
|
|||
) {
|
||||
$result = $this->funcCache[$funcHash];
|
||||
} else {
|
||||
$this->checkArgCount( $args, $fname );
|
||||
$this->raiseCondCount();
|
||||
|
||||
// Any undefined argument should be special-cased by the function, but that would be too
|
||||
|
@ -798,23 +798,13 @@ class AbuseFilterCachingParser extends AFPTransitionBase {
|
|||
if ( array_key_exists( $var, $deprecatedVars ) ) {
|
||||
$var = $deprecatedVars[ $var ];
|
||||
}
|
||||
if ( $this->keywordsManager->isVarDisabled( $var ) ) {
|
||||
throw new UserVisibleException(
|
||||
'disabledvar',
|
||||
$this->mCur->pos,
|
||||
[ $var ]
|
||||
);
|
||||
}
|
||||
if ( !$this->varExists( $var ) ) {
|
||||
throw new UserVisibleException(
|
||||
'unrecognisedvar',
|
||||
$this->mCur->pos,
|
||||
[ $var ]
|
||||
);
|
||||
}
|
||||
// With check syntax, all unbound variables will be caught
|
||||
// already. So we do not error unbound variables at runtime,
|
||||
// allowing it to result in DUNDEFINED.
|
||||
$allowMissingVariables = !$this->varExists( $var ) || $this->allowMissingVariables;
|
||||
|
||||
// It's a built-in, non-disabled variable (either set or unset), or a set custom variable
|
||||
$flags = $this->allowMissingVariables
|
||||
$flags = $allowMissingVariables
|
||||
? VariablesManager::GET_LAX
|
||||
// TODO: This should be GET_STRICT, but that's going to be very hard (see T230256)
|
||||
: VariablesManager::GET_BC;
|
||||
|
@ -827,9 +817,6 @@ class AbuseFilterCachingParser extends AFPTransitionBase {
|
|||
* @throws UserVisibleException
|
||||
*/
|
||||
protected function setUserVariable( $name, $value ) {
|
||||
if ( $this->isReservedIdentifier( $name ) ) {
|
||||
throw new UserVisibleException( 'overridebuiltin', $this->mCur->pos, [ $name ] );
|
||||
}
|
||||
$this->mVariables->setVar( $name, $value );
|
||||
}
|
||||
|
||||
|
@ -1454,48 +1441,18 @@ class AbuseFilterCachingParser extends AFPTransitionBase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Given a node that we don't need to evaluate, decide what to do with it. The nodes passed in
|
||||
* will usually be discarded by short-circuit evaluation. If we allow it, then we just hoist
|
||||
* the variables assigned in any descendant of the node. Otherwise, we fully evaluate the node.
|
||||
* Given a node that we don't need to evaluate, decide what to do with it.
|
||||
* The nodes passed in will usually be discarded by short-circuit
|
||||
* evaluation. If we don't allow it, we fully evaluate the node.
|
||||
*
|
||||
* @param AFPTreeNode $node
|
||||
*/
|
||||
private function maybeDiscardNode( AFPTreeNode $node ) {
|
||||
if ( $this->mAllowShort ) {
|
||||
$this->discardWithHoisting( $node );
|
||||
} else {
|
||||
if ( !$this->mAllowShort ) {
|
||||
$this->evalNode( $node );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Intended to be used for short-circuit as a solution for T214674.
|
||||
* Given a node, check it and its children; if there are assignments of non-existing variables,
|
||||
* hoist them. In case of index assignment or array append, the old value is always erased and
|
||||
* overwritten with a DUNDEFINED. This is used to allow stuff like:
|
||||
* false & ( var := 'foo' ); var == 2
|
||||
* or
|
||||
* if ( false ) then ( var := 'foo' ) else ( 1 ) end; var == 2
|
||||
* where `false` is something evaluated as false at runtime.
|
||||
*
|
||||
* @note This method doesn't check whether the variable exists in case of index assignments.
|
||||
* Hence, in `false & (nonexistent[] := 2)`, `nonexistent` would be hoisted without errors.
|
||||
* However, that would by caught by checkSyntax, so we can avoid checking here: we'd need
|
||||
* way more context than we currently have.
|
||||
*
|
||||
* @param AFPTreeNode $node
|
||||
*/
|
||||
private function discardWithHoisting( AFPTreeNode $node ) {
|
||||
foreach ( $node->getInnerAssignments() as $name ) {
|
||||
if (
|
||||
!$this->mVariables->varIsSet( $name ) ||
|
||||
$this->varManager->getVar( $this->mVariables, $name )->getType() === AFPData::DARRAY
|
||||
) {
|
||||
$this->setUserVariable( $name, new AFPData( AFPData::DUNDEFINED ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a regexp in the AF syntax, make it PCRE-compliant (i.e. we need to escape slashes, add
|
||||
* delimiters and modifiers).
|
||||
|
|
|
@ -60,7 +60,8 @@ class UserVisibleException extends ExceptionBase {
|
|||
// abusefilter-exception-notarray, abusefilter-exception-unclosedcomment
|
||||
// abusefilter-exception-invalidiprange, abusefilter-exception-disabledvar
|
||||
// abusefilter-exception-variablevariable, abusefilter-exception-toomanyargs
|
||||
// abusefilter-exception-negativeoffset
|
||||
// abusefilter-exception-negativeoffset, abusefilter-exception-unusedvars
|
||||
// abusefilter-exception-unknownfunction, abusefilter-exception-usebuiltin
|
||||
return new Message(
|
||||
'abusefilter-exception-' . $this->mExceptionID,
|
||||
array_merge( [ $this->mPosition ], $this->mParams )
|
||||
|
|
635
includes/Parser/SyntaxChecker.php
Normal file
635
includes/Parser/SyntaxChecker.php
Normal file
|
@ -0,0 +1,635 @@
|
|||
<?php
|
||||
|
||||
namespace MediaWiki\Extension\AbuseFilter\Parser;
|
||||
|
||||
use LogicException;
|
||||
use MediaWiki\Extension\AbuseFilter\KeywordsManager;
|
||||
use MediaWiki\Extension\AbuseFilter\Parser\Exception\InternalException;
|
||||
use MediaWiki\Extension\AbuseFilter\Parser\Exception\UserVisibleException;
|
||||
use Message;
|
||||
|
||||
/**
|
||||
* SyntaxChecker statically analyzes the code without actually running it.
|
||||
* Currently, it only checks for
|
||||
*
|
||||
* - unbound variables
|
||||
* - unused variables: note that a := 1; a := 1; a
|
||||
* is considered OK even though the first `a` seems unused
|
||||
* because the pattern "a := null; if ... then (a := ...) end; ..."
|
||||
* should not count first `a` as unused.
|
||||
* - assignment to built-in identifiers
|
||||
* - invalid function call (arity mismatch, non-valid function)
|
||||
* - first-order information of `set_var` and `set`
|
||||
*
|
||||
* Because it doesn't cover all checks that the current Check Syntax does,
|
||||
* it is currently complementary to the current Check Syntax.
|
||||
* In the future, it could subsume the current Check Syntax, and could be
|
||||
* extended to perform type checking or type inference.
|
||||
*/
|
||||
class SyntaxChecker extends AFPTransitionBase {
|
||||
/**
|
||||
* @var AFPTreeNode|null Root of the AST to check
|
||||
*/
|
||||
private $treeRoot;
|
||||
|
||||
/** @var KeywordsManager */
|
||||
protected $keywordsManager;
|
||||
|
||||
public const MCONSERVATIVE = 'MODE_CONSERVATIVE';
|
||||
public const MLIBERAL = 'MODE_LIBERAL';
|
||||
public const DUMMYPOS = 0;
|
||||
public const CACHE_VERSION = 1;
|
||||
|
||||
/**
|
||||
* @var string The mode of checking. The value should be either
|
||||
*
|
||||
* - MLIBERAL: which guarantees that all user-defined variables
|
||||
* will be bound, but incompatible with what the evaluator currently
|
||||
* permits. E.g.,
|
||||
*
|
||||
* if true then (a := 1) else null end; a
|
||||
*
|
||||
* is rejected in this mode, even though `a` is in fact always bound.
|
||||
*
|
||||
* - MCONSERVATIVE which is compatible with what the evaluator
|
||||
* currently permits, but could allow undefined variables to occur.
|
||||
* E.g.,
|
||||
*
|
||||
* if false then (a := 1) else null end; a
|
||||
*
|
||||
* is accepted in this mode, even though `a` is in fact always unbound.
|
||||
*/
|
||||
private $mode;
|
||||
|
||||
/**
|
||||
* @var bool Whether we want to check for unused variables
|
||||
*/
|
||||
private $checkUnusedVars;
|
||||
|
||||
/**
|
||||
* @param AFPSyntaxTree $tree
|
||||
* @param KeywordsManager $keywordsManager
|
||||
* @param string $mode
|
||||
* @param bool $checkUnusedVars
|
||||
*/
|
||||
public function __construct(
|
||||
AFPSyntaxTree $tree,
|
||||
KeywordsManager $keywordsManager,
|
||||
string $mode = self::MCONSERVATIVE,
|
||||
bool $checkUnusedVars = false
|
||||
) {
|
||||
$this->treeRoot = $tree->getRoot();
|
||||
$this->keywordsManager = $keywordsManager;
|
||||
$this->mode = $mode;
|
||||
$this->checkUnusedVars = $checkUnusedVars;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the static analysis
|
||||
*
|
||||
* @throws UserVisibleException
|
||||
*/
|
||||
public function start(): void {
|
||||
if ( !$this->treeRoot ) {
|
||||
return;
|
||||
}
|
||||
$bound = $this->check( $this->desugar( $this->treeRoot ), [] );
|
||||
$unused = array_keys( array_filter( $bound, static function ( $v ) {
|
||||
return !$v;
|
||||
} ) );
|
||||
if ( $this->checkUnusedVars && $unused ) {
|
||||
throw new UserVisibleException(
|
||||
'unusedvars',
|
||||
self::DUMMYPOS,
|
||||
[ Message::listParam( $unused, 'comma' ) ]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove syntactic sugar so that we don't need to deal with
|
||||
* too many cases.
|
||||
*
|
||||
* This could benefit the evaluator as well, but for now, this is
|
||||
* only used for static analysis.
|
||||
*
|
||||
* Postcondition:
|
||||
* - The tree will not contain nodes of
|
||||
* type ASSIGNMENT, LOGIC, COMPARE, SUM_REL, MUL_REL, POW,
|
||||
* KEYWORD_OPERATOR, and ARRAY_INDEX
|
||||
* - The tree may additionally contain a node of type BINOP.
|
||||
* - The tree should not have set_var function application.
|
||||
* - Conditionals will have both branches.
|
||||
*
|
||||
* @param AFPTreeNode $node
|
||||
* @return AFPTreeNode
|
||||
* @throws InternalException
|
||||
*/
|
||||
private function desugar( AFPTreeNode $node ): AFPTreeNode {
|
||||
switch ( $node->type ) {
|
||||
case AFPTreeNode::ATOM:
|
||||
return $node;
|
||||
|
||||
case AFPTreeNode::FUNCTION_CALL:
|
||||
if ( $node->children[0] === 'set_var' ) {
|
||||
$node->children[0] = 'set';
|
||||
}
|
||||
return $this->newNodeMapExceptFirst( $node );
|
||||
|
||||
case AFPTreeNode::ARRAY_INDEX:
|
||||
return $this->newNodeNamedBinop( $node, '[]' );
|
||||
|
||||
case AFPTreeNode::POW:
|
||||
return $this->newNodeNamedBinop( $node, '**' );
|
||||
|
||||
case AFPTreeNode::UNARY:
|
||||
return $this->newNodeMapExceptFirst( $node );
|
||||
|
||||
case AFPTreeNode::BOOL_INVERT:
|
||||
/*
|
||||
* @todo this should really be combined with UNARY,
|
||||
* but let's wait to change the meaning of UNARY across
|
||||
* the codebase together
|
||||
*/
|
||||
return $this->newNodeMapAll( $node );
|
||||
|
||||
case AFPTreeNode::KEYWORD_OPERATOR:
|
||||
case AFPTreeNode::MUL_REL:
|
||||
case AFPTreeNode::SUM_REL:
|
||||
case AFPTreeNode::COMPARE:
|
||||
return $this->newNodeBinop( $node );
|
||||
|
||||
case AFPTreeNode::LOGIC:
|
||||
$result = $this->newNodeBinop( $node );
|
||||
list( $op, $left, $right ) = $result->children;
|
||||
if ( $op === '&' || $op === '|' ) {
|
||||
return $this->desugarAndOr( $op, $left, $right, $node->position );
|
||||
} else {
|
||||
return $result;
|
||||
}
|
||||
|
||||
case AFPTreeNode::ARRAY_DEFINITION:
|
||||
case AFPTreeNode::SEMICOLON:
|
||||
return $this->newNodeMapAll( $node );
|
||||
|
||||
case AFPTreeNode::CONDITIONAL:
|
||||
if ( $node->children[2] === null ) {
|
||||
$node->children[2] = new AFPTreeNode(
|
||||
AFPTreeNode::ATOM,
|
||||
new AFPToken(
|
||||
AFPTOKEN::TKEYWORD,
|
||||
"null",
|
||||
$node->position
|
||||
),
|
||||
$node->position
|
||||
);
|
||||
}
|
||||
return $this->newNodeMapAll( $node );
|
||||
|
||||
case AFPTreeNode::ASSIGNMENT:
|
||||
list( $varname, $value ) = $node->children;
|
||||
|
||||
return new AFPTreeNode(
|
||||
AFPTreeNode::FUNCTION_CALL,
|
||||
[
|
||||
"set",
|
||||
new AFPTreeNode(
|
||||
AFPTreeNode::ATOM,
|
||||
new AFPToken(
|
||||
AFPToken::TSTRING,
|
||||
$varname,
|
||||
$node->position
|
||||
),
|
||||
$node->position
|
||||
),
|
||||
$this->desugar( $value )
|
||||
],
|
||||
$node->position
|
||||
);
|
||||
|
||||
case AFPTreeNode::INDEX_ASSIGNMENT:
|
||||
case AFPTreeNode::ARRAY_APPEND:
|
||||
return $this->newNodeMapExceptFirst( $node );
|
||||
|
||||
default:
|
||||
// @codeCoverageIgnoreStart
|
||||
throw new InternalException( "Unknown node type passed: {$node->type}" );
|
||||
// @codeCoverageIgnoreEnd
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $op
|
||||
* @param AFPTreeNode $left
|
||||
* @param AFPTreeNode $right
|
||||
* @param int $position
|
||||
* @return AFPTreeNode
|
||||
*/
|
||||
private function desugarAndOr(
|
||||
string $op,
|
||||
AFPTreeNode $left,
|
||||
AFPTreeNode $right,
|
||||
int $position
|
||||
): AFPTreeNode {
|
||||
$trueNode = new AFPTreeNode(
|
||||
AFPTreeNode::ATOM,
|
||||
new AFPToken(
|
||||
AFPTOKEN::TKEYWORD,
|
||||
"true",
|
||||
$position
|
||||
),
|
||||
$position
|
||||
);
|
||||
$falseNode = new AFPTreeNode(
|
||||
AFPTreeNode::ATOM,
|
||||
new AFPToken(
|
||||
AFPTOKEN::TKEYWORD,
|
||||
"false",
|
||||
$position
|
||||
),
|
||||
$position
|
||||
);
|
||||
$conditionalNode = new AFPTreeNode(
|
||||
AFPTreeNode::CONDITIONAL,
|
||||
[
|
||||
$right,
|
||||
$trueNode,
|
||||
$falseNode
|
||||
],
|
||||
$position
|
||||
);
|
||||
|
||||
if ( $op === '&' ) {
|
||||
// <a> & <b> is supposed to be equivalent to
|
||||
// if <a> then (if <b> then true else false) else false end
|
||||
// See T237336 for why this is currently not the case.
|
||||
return new AFPTreeNode(
|
||||
AFPTreeNode::CONDITIONAL,
|
||||
[
|
||||
$left,
|
||||
$conditionalNode,
|
||||
$falseNode
|
||||
],
|
||||
$position
|
||||
);
|
||||
} elseif ( $op === '|' ) {
|
||||
// <a> | <b> is supposed to be equivalent to
|
||||
// if <a> then true else (if <b> then true else false) end
|
||||
// See T237336 for why this is currently not the case.
|
||||
return new AFPTreeNode(
|
||||
AFPTreeNode::CONDITIONAL,
|
||||
[
|
||||
$left,
|
||||
$trueNode,
|
||||
$conditionalNode
|
||||
],
|
||||
$position
|
||||
);
|
||||
} else {
|
||||
// @codeCoverageIgnoreStart
|
||||
throw new InternalException( "Unknown operator: {$op}" );
|
||||
// @codeCoverageIgnoreEnd
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new node with information based on the old node but
|
||||
* with different children
|
||||
*
|
||||
* @param AFPTreeNode $node
|
||||
* @param AFPTreeNode[]|string[]|AFPToken $children
|
||||
* @return AFPTreeNode
|
||||
*/
|
||||
private function newNode( AFPTreeNode $node, $children ): AFPTreeNode {
|
||||
return new AFPTreeNode( $node->type, $children, $node->position );
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new node with information based on the old node but
|
||||
* with different type
|
||||
*
|
||||
* @param AFPTreeNode $node
|
||||
* @param string $type
|
||||
* @return AFPTreeNode
|
||||
*/
|
||||
private function newNodeReplaceType(
|
||||
AFPTreeNode $node,
|
||||
string $type
|
||||
): AFPTreeNode {
|
||||
return new AFPTreeNode( $type, $node->children, $node->position );
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively desugar on all children
|
||||
*
|
||||
* @param AFPTreeNode $node
|
||||
* @return AFPTreeNode
|
||||
*/
|
||||
private function newNodeMapAll( AFPTreeNode $node ): AFPTreeNode {
|
||||
$children = $node->children;
|
||||
if ( !is_array( $children ) ) {
|
||||
// @codeCoverageIgnoreStart
|
||||
throw new LogicException(
|
||||
"Unexpected non-array children of an AFPTreeNode of type " .
|
||||
"{$node->type} at position {$node->position}"
|
||||
);
|
||||
// @codeCoverageIgnoreEnd
|
||||
}
|
||||
return $this->newNode( $node, array_map( [ $this, 'desugar' ], $children ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively desugar on all children except the first one
|
||||
*
|
||||
* @param AFPTreeNode $node
|
||||
* @return AFPTreeNode
|
||||
*/
|
||||
private function newNodeMapExceptFirst( AFPTreeNode $node ): AFPTreeNode {
|
||||
$items = [ $node->children[0] ];
|
||||
$args = array_slice( $node->children, 1 );
|
||||
foreach ( $args as $el ) {
|
||||
$items[] = $this->desugar( $el );
|
||||
}
|
||||
return $this->newNode( $node, $items );
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a node with an operation into a BINOP
|
||||
*
|
||||
* @param AFPTreeNode $node
|
||||
* @return AFPTreeNode
|
||||
*/
|
||||
private function newNodeBinop( AFPTreeNode $node ): AFPTreeNode {
|
||||
return $this->newNodeReplaceType(
|
||||
$this->newNodeMapExceptFirst( $node ),
|
||||
AFPTreeNode::BINOP
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a node without an operation into a BINOP with the specified operation
|
||||
*
|
||||
* @param AFPTreeNode $node
|
||||
* @param string $op
|
||||
* @return AFPTreeNode
|
||||
*/
|
||||
private function newNodeNamedBinop(
|
||||
AFPTreeNode $node,
|
||||
string $op
|
||||
): AFPTreeNode {
|
||||
$items = $this->newNodeMapAll( $node )->children;
|
||||
array_unshift( $items, $op );
|
||||
return $this->newNodeReplaceType(
|
||||
$this->newNode( $node, $items ),
|
||||
AFPTreeNode::BINOP
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* - Statically compute what are bound after evaluating $node,
|
||||
* provided that variables in $bound are already bound.
|
||||
* - Similarly compute for each bound variable after evaluating $node
|
||||
* whether it is used provided that we already have $bound
|
||||
* that contains necessary information.
|
||||
* - Ensure function application's validity.
|
||||
* - Ensure that the first argument of set is a literal string.
|
||||
* - Ensure that all assignment is not done on built-in identifier.
|
||||
*
|
||||
* Precondition:
|
||||
* - The tree $node should be desugared and normalized.
|
||||
*
|
||||
* Postcondition:
|
||||
* - $node is guaranteed to have no unbound variables
|
||||
* provided that variables in $bound are already bound
|
||||
* (for the definition of unbound variable indicated by $this->mode)
|
||||
* - All function applications should be valid and have correct arity.
|
||||
* - The set function application's first argument should be
|
||||
* a literal string.
|
||||
*
|
||||
* @param AFPTreeNode $node
|
||||
* @param bool[] $bound Map of [ variable_name => used ]
|
||||
* @return bool[] Map of [ variable_name => used ]
|
||||
* @throws UserVisibleException
|
||||
* @throws InternalException
|
||||
*/
|
||||
private function check( AFPTreeNode $node, array $bound ): array {
|
||||
switch ( $node->type ) {
|
||||
// phpcs:ignore PSR2.ControlStructures.SwitchDeclaration.TerminatingComment
|
||||
case AFPTreeNode::ATOM:
|
||||
$tok = $node->children;
|
||||
switch ( $tok->type ) {
|
||||
case AFPToken::TID:
|
||||
return $this->lookupVar(
|
||||
$tok->value,
|
||||
$tok->pos,
|
||||
$bound
|
||||
);
|
||||
|
||||
case AFPToken::TSTRING:
|
||||
case AFPToken::TFLOAT:
|
||||
case AFPToken::TINT:
|
||||
case AFPToken::TKEYWORD:
|
||||
return $bound;
|
||||
|
||||
default:
|
||||
// @codeCoverageIgnoreStart
|
||||
throw new InternalException( "Unknown token {$tok->type} provided in the ATOM node" );
|
||||
// @codeCoverageIgnoreEnd
|
||||
}
|
||||
case AFPTreeNode::ARRAY_DEFINITION:
|
||||
// @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here
|
||||
foreach ( $node->children as $el ) {
|
||||
$bound = $this->check( $el, $bound );
|
||||
}
|
||||
return $bound;
|
||||
|
||||
case AFPTreeNode::FUNCTION_CALL:
|
||||
$fname = $node->children[0];
|
||||
$args = array_slice( $node->children, 1 );
|
||||
if ( !array_key_exists( $fname, AbuseFilterCachingParser::FUNCTIONS ) ) {
|
||||
throw new UserVisibleException(
|
||||
'unknownfunction',
|
||||
$node->position,
|
||||
[ $fname ]
|
||||
);
|
||||
}
|
||||
$this->checkArgCount( $args, $fname, $node->position );
|
||||
|
||||
if ( $fname === 'set' ) {
|
||||
// arity is checked, so we know $args[0] and $args[1] exist
|
||||
$tok = $args[0]->children;
|
||||
|
||||
if (
|
||||
!( $tok instanceof AFPToken ) ||
|
||||
$tok->type !== AFPToken::TSTRING
|
||||
) {
|
||||
throw new UserVisibleException(
|
||||
'variablevariable',
|
||||
$node->position,
|
||||
[]
|
||||
);
|
||||
}
|
||||
|
||||
$bound = $this->check( $args[1], $bound );
|
||||
// set the variable as unused
|
||||
return $this->assignVar(
|
||||
$tok->value,
|
||||
$tok->pos,
|
||||
$bound
|
||||
);
|
||||
} else {
|
||||
foreach ( $args as $arg ) {
|
||||
$bound = $this->check( $arg, $bound );
|
||||
}
|
||||
return $bound;
|
||||
}
|
||||
|
||||
case AFPTreeNode::BINOP:
|
||||
list( , $left, $right ) = $node->children;
|
||||
return $this->check( $right, $this->check( $left, $bound ) );
|
||||
|
||||
case AFPTreeNode::UNARY:
|
||||
list( , $argument ) = $node->children;
|
||||
return $this->check( $argument, $bound );
|
||||
|
||||
case AFPTreeNode::BOOL_INVERT:
|
||||
list( $argument ) = $node->children;
|
||||
return $this->check( $argument, $bound );
|
||||
// phpcs:ignore PSR2.ControlStructures.SwitchDeclaration.TerminatingComment
|
||||
case AFPTreeNode::CONDITIONAL:
|
||||
list( $condition, $exprIfTrue, $exprIfFalse ) = $node->children;
|
||||
$bound = $this->check( $condition, $bound );
|
||||
$boundLeft = $this->check( $exprIfTrue, $bound );
|
||||
$boundRight = $this->check( $exprIfFalse, $bound );
|
||||
switch ( $this->mode ) {
|
||||
case self::MCONSERVATIVE:
|
||||
return $this->mapUnion( $boundLeft, $boundRight );
|
||||
case self::MLIBERAL:
|
||||
return $this->mapIntersect( $boundLeft, $boundRight );
|
||||
default:
|
||||
// @codeCoverageIgnoreStart
|
||||
throw new LogicException( "Unknown mode: {$this->mode}" );
|
||||
// @codeCoverageIgnoreEnd
|
||||
}
|
||||
|
||||
case AFPTreeNode::INDEX_ASSIGNMENT:
|
||||
list( $varName, $offset, $value ) = $node->children;
|
||||
|
||||
// deal with unbound $varName
|
||||
$bound = $this->lookupVar( $varName, $node->position, $bound );
|
||||
$bound = $this->check( $offset, $bound );
|
||||
$bound = $this->check( $value, $bound );
|
||||
// deal with built-in $varName and set $varName as unused
|
||||
return $this->assignVar( $varName, $node->position, $bound );
|
||||
|
||||
case AFPTreeNode::ARRAY_APPEND:
|
||||
list( $varName, $value ) = $node->children;
|
||||
|
||||
// deal with unbound $varName
|
||||
$bound = $this->lookupVar( $varName, $node->position, $bound );
|
||||
$bound = $this->check( $value, $bound );
|
||||
// deal with built-in $varName and set $varName as unused
|
||||
return $this->assignVar( $varName, $node->position, $bound );
|
||||
|
||||
case AFPTreeNode::SEMICOLON:
|
||||
// @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here
|
||||
foreach ( $node->children as $statement ) {
|
||||
$bound = $this->check( $statement, $bound );
|
||||
}
|
||||
return $bound;
|
||||
|
||||
default:
|
||||
// @codeCoverageIgnoreStart
|
||||
throw new LogicException( "Unknown type: {$node->type}" );
|
||||
// @codeCoverageIgnoreEnd
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $left
|
||||
* @param array $right
|
||||
* @return array
|
||||
*/
|
||||
private function mapUnion( array $left, array $right ): array {
|
||||
foreach ( $right as $key => $val ) {
|
||||
if ( array_key_exists( $key, $left ) ) {
|
||||
$left[ $key ] = $left[ $key ] || $val;
|
||||
} else {
|
||||
$left[ $key ] = $val;
|
||||
}
|
||||
}
|
||||
return $left;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $left
|
||||
* @param array $right
|
||||
* @return array
|
||||
*/
|
||||
private function mapIntersect( array $left, array $right ): array {
|
||||
$keys = array_intersect_key( $left, $right );
|
||||
$result = [];
|
||||
foreach ( $keys as $key => $val ) {
|
||||
$result[ $key ] = $left[ $key ] || $right[ $key ];
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $var
|
||||
* @param int $pos
|
||||
* @param array $bound
|
||||
* @return array
|
||||
*/
|
||||
private function assignVar( string $var, int $pos, array $bound ): array {
|
||||
$var = strtolower( $var );
|
||||
if ( $this->isReservedIdentifier( $var ) ) {
|
||||
throw new UserVisibleException(
|
||||
'overridebuiltin',
|
||||
$pos,
|
||||
[ $var ]
|
||||
);
|
||||
}
|
||||
$bound[ $var ] = false;
|
||||
return $bound;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $var
|
||||
* @param int $pos
|
||||
* @param array $bound
|
||||
* @return array
|
||||
*/
|
||||
private function lookupVar( string $var, int $pos, array $bound ): array {
|
||||
$var = strtolower( $var );
|
||||
if ( array_key_exists( $var, $bound ) ) {
|
||||
// user-defined variable
|
||||
$bound[ $var ] = true;
|
||||
return $bound;
|
||||
} elseif ( $this->keywordsManager->isVarDisabled( $var ) ) {
|
||||
// disabled built-in variables
|
||||
throw new UserVisibleException(
|
||||
'disabledvar',
|
||||
$pos,
|
||||
[ $var ]
|
||||
);
|
||||
} elseif ( $this->keywordsManager->varExists( $var ) ) {
|
||||
// non-disabled built-in variables
|
||||
return $bound;
|
||||
} elseif ( $this->isReservedIdentifier( $var ) ) {
|
||||
// other built-in identifiers
|
||||
throw new UserVisibleException(
|
||||
'usebuiltin',
|
||||
$pos,
|
||||
[ $var ]
|
||||
);
|
||||
} else {
|
||||
// unbound variables
|
||||
throw new UserVisibleException(
|
||||
'unrecognisedvar',
|
||||
$pos,
|
||||
[ $var ]
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -48,6 +48,7 @@ use MediaWikiIntegrationTestCase;
|
|||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\AbuseFilterTokenizer
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\AFPToken
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\AFPData
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\SyntaxChecker
|
||||
*/
|
||||
class ParserEquivsetTest extends MediaWikiIntegrationTestCase {
|
||||
/**
|
||||
|
|
|
@ -53,6 +53,7 @@ use Wikimedia\TestingAccessWrapper;
|
|||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\Exception\UserVisibleException
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\Exception\ExceptionBase
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\AFPData
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\SyntaxChecker
|
||||
*/
|
||||
class ParserTest extends ParserTestCase {
|
||||
/**
|
||||
|
@ -226,12 +227,12 @@ class ParserTest extends ParserTestCase {
|
|||
[ "if 1 = 1 then 'foo' else 'bar'", 'doLevelConditions' ],
|
||||
[ "a := 1 = 1 ? 'foo'", 'doLevelConditions' ],
|
||||
[ '(1 = 1', 'doLevelBraces' ],
|
||||
[ 'lcase = 3', 'doLevelFunction' ],
|
||||
[ 'lcase( 3 = 1', 'doLevelFunction' ],
|
||||
[ 'a := [1,2', 'doLevelAtom' ],
|
||||
[ '1 = 1 | (1', 'skipOverBraces/doLevelParenthesis' ],
|
||||
[ 'a := [1,2,3]; 3 = a[5', 'doLevelArrayElements' ],
|
||||
[ 'if[3] := 1', 'doLevelConditions' ],
|
||||
[ "set( page_title + 'x' + ( page_namespace == 0 ? 'x' : 'y' )", '' ]
|
||||
];
|
||||
}
|
||||
|
||||
|
@ -471,7 +472,7 @@ class ParserTest extends ParserTestCase {
|
|||
[ "set( 'x' + 'y', 1 )", 'doLevelFunction' ],
|
||||
[ "set( 'x' + page_title, 1 )", 'doLevelFunction' ],
|
||||
[ "set( page_title, 1 )", 'doLevelFunction' ],
|
||||
[ "set( page_title + 'x' + ( page_namespace == 0 ? 'x' : 'y' )", 'doLevelFunction' ],
|
||||
[ "set( page_title + 'x' + ( page_namespace == 0 ? 'x' : 'y' ), 1 )", 'doLevelFunction' ],
|
||||
];
|
||||
}
|
||||
|
||||
|
@ -504,10 +505,41 @@ class ParserTest extends ParserTestCase {
|
|||
[ 'set("added_lines", 45)', 'setUserVariable' ],
|
||||
[ 'set("length", 45)', 'setUserVariable' ],
|
||||
[ 'set("true", true)', 'setUserVariable' ],
|
||||
[ 'contains_any[1] := "foo"', '??' ],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the 'usebuiltin' exception
|
||||
*
|
||||
* @param string $expr The expression to test
|
||||
* @dataProvider useBuiltin
|
||||
*/
|
||||
public function testUseBuiltinException( $expr ) {
|
||||
$this->exceptionTest( 'usebuiltin', $expr );
|
||||
$this->exceptionTestInSkippedBlock( 'usebuiltin', $expr );
|
||||
}
|
||||
|
||||
/**
|
||||
* Data provider for testUseBuiltinException
|
||||
* @return array
|
||||
*/
|
||||
public function useBuiltin() {
|
||||
return [
|
||||
[ 'contains_any[1] := "foo"' ],
|
||||
[ '1 + lcase + 2' ]
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Test a filter only containing a function application of
|
||||
* an unknown function.
|
||||
*/
|
||||
public function testUnknownFunction() {
|
||||
$expr = "f(1)";
|
||||
$this->exceptionTest( 'unknownfunction', $expr );
|
||||
$this->exceptionTestInSkippedBlock( 'unknownfunction', $expr );
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $expr The expression to test
|
||||
* @param string $caller The function where the exception is thrown
|
||||
|
@ -840,6 +872,28 @@ class ParserTest extends ParserTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for mutating arrays under a skipped branch.
|
||||
*
|
||||
* @param string $code Code to parse
|
||||
* @dataProvider provideArrayAssignmentShortCircuit
|
||||
*/
|
||||
public function testArrayAssignmentShortCircuit( string $code ) {
|
||||
$this->assertTrue( $this->getParser()->parse( $code ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Data provider for testArrayAssignmentShortCircuit
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function provideArrayAssignmentShortCircuit() {
|
||||
return [
|
||||
[ 'a := [true]; false & (a[] := 2); a[0]' ],
|
||||
[ 'a := [true]; false & (a[1] := 2); a[0]' ],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that code declaring a variable in a skipped brace (because of shortcircuit)
|
||||
* will be parsed without throwing an exception when later trying to use that var. T214674
|
||||
|
@ -858,8 +912,6 @@ class ParserTest extends ParserTestCase {
|
|||
public function provideVarDeclarationInSkippedBlock() {
|
||||
return [
|
||||
[ "x := [5]; false & (1 == 1; y := 'b'; x[1] := 'x'; 3 < 4); y != 'b' & x[1] != 'x'" ],
|
||||
[ "(var := [1]); false & ( var[] := 'baz' ); count(var) > -1" ],
|
||||
[ "(var := [1]); false & ( var[1] := 'baz' ); var[1] === 'baz'" ],
|
||||
[ "false & (set('myvar', 1)); myvar contains 1" ],
|
||||
[ "false & ( ( false & ( var := [1] ) ) | ( var[] := 2 ) ); var" ],
|
||||
[ "false & ( ( false & ( var := [1] ); true ) | ( var[] := 2 ) ); var" ],
|
||||
|
|
|
@ -128,16 +128,12 @@ abstract class ParserTestCase extends MediaWikiUnitTestCase {
|
|||
* @return Language|MockObject
|
||||
*/
|
||||
protected function getLanguageMock() {
|
||||
$lang = $this->getMockBuilder( LanguageEn::class )
|
||||
->disableOriginalConstructor()
|
||||
->getMock();
|
||||
$lang->expects( $this->any() )
|
||||
->method( 'uc' )
|
||||
$lang = $this->createMock( LanguageEn::class );
|
||||
$lang->method( 'uc' )
|
||||
->willReturnCallback( static function ( $x ) {
|
||||
return mb_strtoupper( $x );
|
||||
} );
|
||||
$lang->expects( $this->any() )
|
||||
->method( 'lc' )
|
||||
$lang->method( 'lc' )
|
||||
->willReturnCallback( static function ( $x ) {
|
||||
return mb_strtolower( $x );
|
||||
} );
|
||||
|
|
333
tests/phpunit/unit/Parser/SyntaxCheckerTest.php
Normal file
333
tests/phpunit/unit/Parser/SyntaxCheckerTest.php
Normal file
|
@ -0,0 +1,333 @@
|
|||
<?php
|
||||
/**
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
*
|
||||
* @license GPL-2.0-or-later
|
||||
*/
|
||||
|
||||
namespace MediaWiki\Extension\AbuseFilter\Tests\Unit\Parser;
|
||||
|
||||
use EmptyBagOStuff;
|
||||
use MediaWiki\Extension\AbuseFilter\Hooks\AbuseFilterHookRunner;
|
||||
use MediaWiki\Extension\AbuseFilter\KeywordsManager;
|
||||
use MediaWiki\Extension\AbuseFilter\Parser\AFPTreeParser;
|
||||
use MediaWiki\Extension\AbuseFilter\Parser\Exception\UserVisibleException;
|
||||
use MediaWiki\Extension\AbuseFilter\Parser\SyntaxChecker;
|
||||
use NullStatsdDataFactory;
|
||||
use Psr\Log\NullLogger;
|
||||
|
||||
/**
|
||||
* @group Test
|
||||
* @group AbuseFilter
|
||||
* @group AbuseFilterParser
|
||||
*
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\SyntaxChecker
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\AbuseFilterCachingParser
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\AFPTreeParser
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\AFPTransitionBase
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\AFPTreeNode
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\AFPSyntaxTree
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\AFPParserState
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\AbuseFilterTokenizer
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\AFPToken
|
||||
* @covers \MediaWiki\Extension\AbuseFilter\Parser\AFPData
|
||||
*/
|
||||
class SyntaxCheckerTest extends \MediaWikiUnitTestCase {
|
||||
/**
|
||||
* @param string $excep The expected exception or an empty string
|
||||
* @param string $expr The expression to test
|
||||
* @param string $mode The checking mode
|
||||
* @param bool $checkUnusedVars Whether unused variables should error
|
||||
*/
|
||||
private function exceptionTest( string $excep, string $expr, string $mode, bool $checkUnusedVars ): void {
|
||||
$expectException = $excep !== '';
|
||||
$info = " mode=$mode checkUnused=$checkUnusedVars";
|
||||
|
||||
$cache = new EmptyBagOStuff();
|
||||
$logger = new NullLogger();
|
||||
$statsd = new NullStatsdDataFactory();
|
||||
$keywordsManager = new KeywordsManager( $this->createMock( AbuseFilterHookRunner::class ) );
|
||||
|
||||
$parser = new AFPTreeParser( $cache, $logger, $statsd, $keywordsManager );
|
||||
$tree = $parser->parse( $expr );
|
||||
$checker = new SyntaxChecker( $tree, $keywordsManager, $mode, $checkUnusedVars );
|
||||
try {
|
||||
$checker->start();
|
||||
} catch ( UserVisibleException $e ) {
|
||||
if ( $expectException ) {
|
||||
$this->assertEquals( $excep, $e->mExceptionID, "Got wrong exception type: $info" );
|
||||
return;
|
||||
}
|
||||
$this->fail( "Unexpected exception $e thrown: $info" );
|
||||
}
|
||||
$this->assertFalse( $expectException, "Exception $excep not thrown: $info" );
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the arity-related exception
|
||||
*
|
||||
* @param string $excep The expected exception or an empty string
|
||||
* @param string $expr The expression to test
|
||||
* @dataProvider provideArity
|
||||
*/
|
||||
public function testArity( string $excep, string $expr ): void {
|
||||
$this->exceptionTest( $excep, $expr, SyntaxChecker::MCONSERVATIVE, false );
|
||||
}
|
||||
|
||||
/**
|
||||
* Data provider for testArity
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function provideArity(): array {
|
||||
return [
|
||||
[ 'toomanyargs', 'length(1, 2)' ],
|
||||
[ 'noparams', 'length()' ],
|
||||
[ 'notenoughargs', 'contains_any(1)' ],
|
||||
[ '', 'length(1)' ],
|
||||
[ '', 'contains_any(1, 2)' ],
|
||||
[ '', 'contains_any(1, 2, 3)' ],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the function name related exception
|
||||
*
|
||||
* @param string $excep The expected exception or an empty string
|
||||
* @param string $expr The expression to test
|
||||
* @dataProvider provideFunctionName
|
||||
*/
|
||||
public function testFunctionName( string $excep, string $expr ): void {
|
||||
$this->exceptionTest( $excep, $expr, SyntaxChecker::MCONSERVATIVE, false );
|
||||
}
|
||||
|
||||
/**
|
||||
* Data provider for testFunctionName
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function provideFunctionName(): array {
|
||||
return [
|
||||
[ 'unknownfunction', 'f(1)' ],
|
||||
[ 'unknownfunction', 'timestamp(1)' ],
|
||||
[ '', 'length(1)' ],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the assignment related exception
|
||||
*
|
||||
* @param string $excep The expected exception or an empty string
|
||||
* @param string $expr The expression to test
|
||||
* @dataProvider provideAssign
|
||||
*/
|
||||
public function testAssign( string $excep, string $expr ): void {
|
||||
$this->exceptionTest( $excep, $expr, SyntaxChecker::MCONSERVATIVE, false );
|
||||
}
|
||||
|
||||
/**
|
||||
* Data provider for testAssign
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function provideAssign(): array {
|
||||
// array assignments need to lookup first, so error will differ.
|
||||
return [
|
||||
[ 'overridebuiltin', 'timestamp := 1' ],
|
||||
[ 'overridebuiltin', 'length := 1' ],
|
||||
[ '', 'f := 1' ],
|
||||
|
||||
// lookup is fine. assignment is not.
|
||||
[ 'overridebuiltin', 'timestamp[] := 1' ],
|
||||
|
||||
// lookup is not fine.
|
||||
[ 'usebuiltin', 'length[] := 1' ],
|
||||
|
||||
// lookup is not fine.
|
||||
[ 'unrecognisedvar', 'f[] := 1' ],
|
||||
|
||||
// lookup is fine. assignment is not.
|
||||
[ 'overridebuiltin', 'timestamp[0] := 1' ],
|
||||
|
||||
// lookup is not fine.
|
||||
[ 'usebuiltin', 'length[0] := 1' ],
|
||||
|
||||
// lookup is not fine.
|
||||
[ 'unrecognisedvar', 'f[0] := 1' ],
|
||||
|
||||
// static checker currently does not check index error
|
||||
[ '', 'f := 1; f[] := 1' ],
|
||||
[ '', 'f := 1; f[0] := 1' ],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the lookup related exception
|
||||
*
|
||||
* @param string $excep The expected exception or an empty string
|
||||
* @param string $expr The expression to test
|
||||
* @dataProvider provideLookup
|
||||
*/
|
||||
public function testLookup( string $excep, string $expr ): void {
|
||||
$this->exceptionTest( $excep, $expr, SyntaxChecker::MCONSERVATIVE, false );
|
||||
}
|
||||
|
||||
/**
|
||||
* Data provider for testLookup
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function provideLookup(): array {
|
||||
return [
|
||||
[ '', 'timestamp' ],
|
||||
[ 'usebuiltin', 'length' ],
|
||||
[ 'unrecognisedvar', 'f' ],
|
||||
[ '', 'f := 1; f' ]
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Test mode-related exception where both modes differ
|
||||
*
|
||||
* @param string $expr The expression to test
|
||||
* @param string $mode The mode to test
|
||||
* @dataProvider provideModeDiffer
|
||||
*/
|
||||
public function testModeDiffer( string $expr, string $mode ): void {
|
||||
// conservative mode is supposed to pass.
|
||||
// liberal mode is supposed to fail.
|
||||
$this->exceptionTest(
|
||||
$mode === SyntaxChecker::MCONSERVATIVE ? '' : 'unrecognisedvar',
|
||||
$expr,
|
||||
$mode,
|
||||
false
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Data provider for testModeDiffer
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function provideModeDiffer(): array {
|
||||
$testSketches = [
|
||||
// and
|
||||
'(false & (a := 1)); a',
|
||||
'(true & (a := 1)); a',
|
||||
// or
|
||||
'(true | (a := 1)); a',
|
||||
'(false | (a := 1)); a',
|
||||
// if
|
||||
'if 1 then (a := 1) else 1 end; a',
|
||||
'if 1 then 1 else (a := 1) end; a',
|
||||
];
|
||||
$tests = [];
|
||||
foreach ( $testSketches as $test ) {
|
||||
$tests[] = [ $test, SyntaxChecker::MCONSERVATIVE ];
|
||||
$tests[] = [ $test, SyntaxChecker::MLIBERAL ];
|
||||
}
|
||||
return $tests;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test mode-related exception where both modes agree
|
||||
*
|
||||
* @param string $excep The expected exception or an empty string
|
||||
* @param string $expr The expression to test
|
||||
* @param string $mode The mode to test
|
||||
* @dataProvider provideModeAgree
|
||||
*/
|
||||
public function testModeAgree( string $excep, string $expr, string $mode ): void {
|
||||
$this->exceptionTest( $excep, $expr, $mode, false );
|
||||
}
|
||||
|
||||
/**
|
||||
* Data provider for testModeAgree
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function provideModeAgree(): array {
|
||||
$testSketches = [
|
||||
// pass tests
|
||||
// and
|
||||
[ true, '((a := 1) & 0); a' ],
|
||||
// or
|
||||
[ true, '((a := 1) | 0); a' ],
|
||||
// if
|
||||
[ true, 'if 1 then (a := 1) else (a := 1) end; a' ],
|
||||
[ true, 'if (a := 1) then 1 else 1 end; a' ],
|
||||
|
||||
// fail tests
|
||||
// and
|
||||
[ false, 'false & a' ],
|
||||
// or
|
||||
[ false, 'true | a' ],
|
||||
// if
|
||||
[ false, 'if true then 1 else a end' ],
|
||||
[ false, 'if false then a else 1 end' ],
|
||||
[ false, 'if a then 1 else 1 end' ],
|
||||
];
|
||||
$tests = [];
|
||||
foreach ( $testSketches as $test ) {
|
||||
$excep = $test[0] ? '' : 'unrecognisedvar';
|
||||
$tests[] = [ $excep, $test[1], SyntaxChecker::MCONSERVATIVE ];
|
||||
$tests[] = [ $excep, $test[1], SyntaxChecker::MLIBERAL ];
|
||||
}
|
||||
return $tests;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test unused variable
|
||||
*
|
||||
* @param string $excep The expected exception or an empty string
|
||||
* @param string $expr The expression to test
|
||||
* @dataProvider provideUnusedVars
|
||||
*/
|
||||
public function testUnusedVars( string $excep, string $expr ): void {
|
||||
$this->exceptionTest( $excep, $expr, SyntaxChecker::MCONSERVATIVE, true );
|
||||
}
|
||||
|
||||
/**
|
||||
* Data provider for testUnusedVars
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function provideUnusedVars(): array {
|
||||
return [
|
||||
[ '', 'a := 1; a' ],
|
||||
|
||||
// even though the first a is not used, we allow it to prevent
|
||||
// too many false-positives. Note that setting a variable to null
|
||||
// and mutate the variable to something else without ever reading
|
||||
// that variable is a pretty common idiom, which is another reason
|
||||
// we we don't want to error this.
|
||||
[ '', 'a := 1; a := 1; a' ],
|
||||
|
||||
[ '', 'a := 1; b := a; b' ],
|
||||
|
||||
// eventually a is not used.
|
||||
[ 'unusedvars', 'a := 1' ],
|
||||
|
||||
// eventually a is not used.
|
||||
[ 'unusedvars', 'a := 1; a := 1' ],
|
||||
|
||||
// eventually a is not used.
|
||||
[ 'unusedvars', 'a := 1; b := a; a := b' ],
|
||||
];
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue