mediawiki-extensions-AbuseF.../includes/parser/AFPTreeParser.php
Daimona Eaytoy 71730f7d44 Warn if a function has been given too many parameters
While this is not as important as throwing for too few parameters, IMHO
it's still important to fail in this case. Mostly because if a function
receives too many parameters, chances are that who wrote the filter
didn't do that intendedly, and thus there may be a hidden bug.
Bonus: fix a few docblocks.

Bug: T230803
Change-Id: Iac2931f17b50ace8c8f4c2faa44b3f54ca134c54
2019-08-26 20:29:49 +02:00

732 lines
18 KiB
PHP

<?php
/**
* A version of the abuse filter parser that separates parsing the filter and
* evaluating it into different passes, allowing the parse tree to be cached.
*
* @file
*/
use MediaWiki\Logger\LoggerFactory;
/**
* A parser that transforms the text of the filter into a parse tree.
*/
class AFPTreeParser {
/**
* @var array[] Contains the AFPTokens for the code being parsed
*/
public $mTokens;
/**
* @var AFPToken The current token
*/
public $mCur;
/**
* @var int The position of the current token
*/
public $mPos;
/**
* @var string|null The ID of the filter being parsed, if available. Can also be "global-$ID"
*/
protected $mFilter;
const CACHE_VERSION = 2;
/**
* @var bool[] Custom variable names, set of [ name => true ]
*/
private $variablesNames;
/**
* Create a new instance
*/
public function __construct() {
$this->resetState();
}
/**
* @param string $filter
*/
public function setFilter( $filter ) {
$this->mFilter = $filter;
}
/**
* Resets the state
*/
public function resetState() {
$this->mTokens = [];
$this->mPos = 0;
$this->variablesNames = [];
$this->mFilter = null;
}
/**
* Advances the parser to the next token in the filter code.
*/
protected function move() {
list( $this->mCur, $this->mPos ) = $this->mTokens[$this->mPos];
}
/**
* Get the next token. This is similar to move() but doesn't change class members,
* allowing to look ahead without rolling back the state.
*
* @return AFPToken
*/
protected function getNextToken() {
return $this->mTokens[$this->mPos][0];
}
/**
* getState() function allows parser state to be rollbacked to several tokens
* back.
*
* @return AFPParserState
*/
protected function getState() {
return new AFPParserState( $this->mCur, $this->mPos );
}
/**
* setState() function allows parser state to be rollbacked to several tokens
* back.
*
* @param AFPParserState $state
*/
protected function setState( AFPParserState $state ) {
$this->mCur = $state->token;
$this->mPos = $state->pos;
}
/**
* Parse the supplied filter source code into a tree.
*
* @param string $code
* @throws AFPUserVisibleException
* @return AFPSyntaxTree
*/
public function parse( $code ) : AFPSyntaxTree {
$this->mTokens = AbuseFilterTokenizer::getTokens( $code );
$this->mPos = 0;
return $this->buildSyntaxTree();
}
/**
* @return AFPSyntaxTree
*/
public function buildSyntaxTree() : AFPSyntaxTree {
$root = $this->doLevelEntry();
$variables = array_keys( $this->variablesNames );
return new AFPSyntaxTree( $variables, $root );
}
/* Levels */
/**
* Handles unexpected characters after the expression.
* @return AFPTreeNode|null Null only if no statements
* @throws AFPUserVisibleException
*/
protected function doLevelEntry() {
$result = $this->doLevelSemicolon();
if ( $this->mCur->type !== AFPToken::TNONE ) {
throw new AFPUserVisibleException(
'unexpectedatend',
$this->mPos, [ $this->mCur->type ]
);
}
return $result;
}
/**
* Handles the semicolon operator.
*
* @return AFPTreeNode|null
*/
protected function doLevelSemicolon() {
$statements = [];
do {
$this->move();
$position = $this->mPos;
if (
$this->mCur->type === AFPToken::TNONE ||
( $this->mCur->type === AFPToken::TBRACE && $this->mCur->value == ')' )
) {
// Handle special cases which the other parser handled in doLevelAtom
break;
}
// Allow empty statements.
if ( $this->mCur->type === AFPToken::TSTATEMENTSEPARATOR ) {
continue;
}
$statements[] = $this->doLevelSet();
$position = $this->mPos;
} while ( $this->mCur->type === AFPToken::TSTATEMENTSEPARATOR );
// Flatten the tree if possible.
if ( count( $statements ) === 0 ) {
return null;
} elseif ( count( $statements ) === 1 ) {
return $statements[0];
} else {
return new AFPTreeNode( AFPTreeNode::SEMICOLON, $statements, $position );
}
}
/**
* Handles variable assignment.
*
* @return AFPTreeNode
* @throws AFPUserVisibleException
*/
protected function doLevelSet() {
if ( $this->mCur->type === AFPToken::TID ) {
$varname = $this->mCur->value;
// Speculatively parse the assignment statement assuming it can
// potentially be an assignment, but roll back if it isn't.
$initialState = $this->getState();
$this->move();
if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === ':=' ) {
$position = $this->mPos;
$this->move();
$value = $this->doLevelSet();
$this->variablesNames[ $varname ] = true;
return new AFPTreeNode( AFPTreeNode::ASSIGNMENT, [ $varname, $value ], $position );
}
if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === '[' ) {
$this->move();
if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) {
$index = 'append';
} else {
// Parse index offset.
$this->setState( $initialState );
$this->move();
$index = $this->doLevelSemicolon();
if ( !( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) ) {
throw new AFPUserVisibleException( 'expectednotfound', $this->mPos,
[ ']', $this->mCur->type, $this->mCur->value ] );
}
}
$this->move();
if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === ':=' ) {
$position = $this->mPos;
$this->move();
$value = $this->doLevelSet();
$this->variablesNames[ $varname ] = true;
if ( $index === 'append' ) {
return new AFPTreeNode(
AFPTreeNode::ARRAY_APPEND, [ $varname, $value ], $position );
} else {
return new AFPTreeNode(
AFPTreeNode::INDEX_ASSIGNMENT,
[ $varname, $index, $value ],
$position
);
}
}
}
// If we reached this point, we did not find an assignment. Roll back
// and assume this was just a literal.
$this->setState( $initialState );
}
return $this->doLevelConditions();
}
/**
* Handles ternary operator and if-then-else-end.
*
* @return AFPTreeNode
* @throws AFPUserVisibleException
*/
protected function doLevelConditions() {
if ( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'if' ) {
$position = $this->mPos;
$this->move();
$condition = $this->doLevelBoolOps();
if ( !( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'then' ) ) {
throw new AFPUserVisibleException( 'expectednotfound',
$this->mPos,
[
'then',
$this->mCur->type,
$this->mCur->value
]
);
}
$this->move();
$valueIfTrue = $this->doLevelConditions();
if ( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'else' ) {
$this->move();
$valueIfFalse = $this->doLevelConditions();
} else {
$valueIfFalse = null;
}
if ( !( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'end' ) ) {
throw new AFPUserVisibleException( 'expectednotfound',
$this->mPos,
[
'end',
$this->mCur->type,
$this->mCur->value
]
);
}
$this->move();
return new AFPTreeNode(
AFPTreeNode::CONDITIONAL,
[ $condition, $valueIfTrue, $valueIfFalse ],
$position
);
}
$condition = $this->doLevelBoolOps();
if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === '?' ) {
$position = $this->mPos;
$this->move();
$valueIfTrue = $this->doLevelConditions();
if ( !( $this->mCur->type === AFPToken::TOP && $this->mCur->value === ':' ) ) {
throw new AFPUserVisibleException( 'expectednotfound',
$this->mPos,
[
':',
$this->mCur->type,
$this->mCur->value
]
);
}
$this->move();
$valueIfFalse = $this->doLevelConditions();
return new AFPTreeNode(
AFPTreeNode::CONDITIONAL,
[ $condition, $valueIfTrue, $valueIfFalse ],
$position
);
}
return $condition;
}
/**
* Handles logic operators.
*
* @return AFPTreeNode
*/
protected function doLevelBoolOps() {
$leftOperand = $this->doLevelCompares();
$ops = [ '&', '|', '^' ];
while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
$op = $this->mCur->value;
$position = $this->mPos;
$this->move();
$rightOperand = $this->doLevelCompares();
$leftOperand = new AFPTreeNode(
AFPTreeNode::LOGIC,
[ $op, $leftOperand, $rightOperand ],
$position
);
}
return $leftOperand;
}
/**
* Handles comparison operators.
*
* @return AFPTreeNode
*/
protected function doLevelCompares() {
$leftOperand = $this->doLevelSumRels();
$equalityOps = [ '==', '===', '!=', '!==', '=' ];
$orderOps = [ '<', '>', '<=', '>=' ];
// Only allow either a single operation, or a combination of a single equalityOps and a single
// orderOps. This resembles what PHP does, and allows `a < b == c` while rejecting `a < b < c`
$allowedOps = array_merge( $equalityOps, $orderOps );
while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $allowedOps ) ) {
$op = $this->mCur->value;
$allowedOps = in_array( $op, $equalityOps ) ?
array_diff( $allowedOps, $equalityOps ) :
array_diff( $allowedOps, $orderOps );
$position = $this->mPos;
$this->move();
$rightOperand = $this->doLevelSumRels();
$leftOperand = new AFPTreeNode(
AFPTreeNode::COMPARE,
[ $op, $leftOperand, $rightOperand ],
$position
);
}
return $leftOperand;
}
/**
* Handle addition and subtraction.
*
* @return AFPTreeNode
*/
protected function doLevelSumRels() {
$leftOperand = $this->doLevelMulRels();
$ops = [ '+', '-' ];
while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
$op = $this->mCur->value;
$position = $this->mPos;
$this->move();
$rightOperand = $this->doLevelMulRels();
$leftOperand = new AFPTreeNode(
AFPTreeNode::SUM_REL,
[ $op, $leftOperand, $rightOperand ],
$position
);
}
return $leftOperand;
}
/**
* Handles multiplication and division.
*
* @return AFPTreeNode
*/
protected function doLevelMulRels() {
$leftOperand = $this->doLevelPow();
$ops = [ '*', '/', '%' ];
while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
$op = $this->mCur->value;
$position = $this->mPos;
$this->move();
$rightOperand = $this->doLevelPow();
$leftOperand = new AFPTreeNode(
AFPTreeNode::MUL_REL,
[ $op, $leftOperand, $rightOperand ],
$position
);
}
return $leftOperand;
}
/**
* Handles exponentiation.
*
* @return AFPTreeNode
*/
protected function doLevelPow() {
$base = $this->doLevelBoolInvert();
while ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === '**' ) {
$position = $this->mPos;
$this->move();
$exponent = $this->doLevelBoolInvert();
$base = new AFPTreeNode( AFPTreeNode::POW, [ $base, $exponent ], $position );
}
return $base;
}
/**
* Handles boolean inversion.
*
* @return AFPTreeNode
*/
protected function doLevelBoolInvert() {
if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === '!' ) {
$position = $this->mPos;
$this->move();
$argument = $this->doLevelKeywordOperators();
return new AFPTreeNode( AFPTreeNode::BOOL_INVERT, [ $argument ], $position );
}
return $this->doLevelKeywordOperators();
}
/**
* Handles keyword operators.
*
* @return AFPTreeNode
*/
protected function doLevelKeywordOperators() {
$leftOperand = $this->doLevelUnarys();
$keyword = strtolower( $this->mCur->value );
if ( $this->mCur->type === AFPToken::TKEYWORD &&
isset( AbuseFilterParser::$mKeywords[$keyword] )
) {
$position = $this->mPos;
$this->move();
$rightOperand = $this->doLevelUnarys();
return new AFPTreeNode(
AFPTreeNode::KEYWORD_OPERATOR,
[ $keyword, $leftOperand, $rightOperand ],
$position
);
}
return $leftOperand;
}
/**
* Handles unary operators.
*
* @return AFPTreeNode
*/
protected function doLevelUnarys() {
$op = $this->mCur->value;
if ( $this->mCur->type === AFPToken::TOP && ( $op === "+" || $op === "-" ) ) {
$position = $this->mPos;
$this->move();
$argument = $this->doLevelArrayElements();
return new AFPTreeNode( AFPTreeNode::UNARY, [ $op, $argument ], $position );
}
return $this->doLevelArrayElements();
}
/**
* Handles accessing an array element by an offset.
*
* @return AFPTreeNode
* @throws AFPUserVisibleException
*/
protected function doLevelArrayElements() {
$array = $this->doLevelParenthesis();
while ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === '[' ) {
$position = $this->mPos;
$index = $this->doLevelSemicolon();
$array = new AFPTreeNode( AFPTreeNode::ARRAY_INDEX, [ $array, $index ], $position );
if ( !( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) ) {
throw new AFPUserVisibleException( 'expectednotfound', $this->mPos,
[ ']', $this->mCur->type, $this->mCur->value ] );
}
$this->move();
}
return $array;
}
/**
* Handles parenthesis.
*
* @return AFPTreeNode
* @throws AFPUserVisibleException
*/
protected function doLevelParenthesis() {
if ( $this->mCur->type === AFPToken::TBRACE && $this->mCur->value === '(' ) {
$result = $this->doLevelSemicolon();
if ( !( $this->mCur->type === AFPToken::TBRACE && $this->mCur->value === ')' ) ) {
throw new AFPUserVisibleException(
'expectednotfound',
$this->mPos,
[ ')', $this->mCur->type, $this->mCur->value ]
);
}
$this->move();
return $result;
}
return $this->doLevelFunction();
}
/**
* Handles function calls.
*
* @return AFPTreeNode
* @throws AFPUserVisibleException
*/
protected function doLevelFunction() {
if ( $this->mCur->type === AFPToken::TID &&
isset( AbuseFilterParser::$mFunctions[$this->mCur->value] )
) {
$func = $this->mCur->value;
$position = $this->mPos;
$this->move();
if ( $this->mCur->type !== AFPToken::TBRACE || $this->mCur->value !== '(' ) {
throw new AFPUserVisibleException( 'expectednotfound',
$this->mPos,
[
'(',
$this->mCur->type,
$this->mCur->value
]
);
}
if ( ( $func === 'set' || $func === 'set_var' ) ) {
$state = $this->getState();
$this->move();
$next = $this->getNextToken();
if (
$this->mCur->type !== AFPToken::TSTRING ||
(
$next->type !== AFPToken::TCOMMA &&
// Let this fail later, when checking parameters count
!( $next->type === AFPToken::TBRACE && $next->value === ')' )
)
) {
throw new AFPUserVisibleException( 'variablevariable', $this->mPos, [] );
} else {
$this->variablesNames[ $this->mCur->value ] = true;
$this->setState( $state );
}
}
$args = [];
$next = $this->getNextToken();
if ( $next->type !== AFPToken::TBRACE || $next->value !== ')' ) {
do {
$args[] = $this->doLevelSemicolon();
} while ( $this->mCur->type === AFPToken::TCOMMA );
} else {
$this->move();
}
if ( $this->mCur->type !== AFPToken::TBRACE || $this->mCur->value !== ')' ) {
throw new AFPUserVisibleException( 'expectednotfound',
$this->mPos,
[
')',
$this->mCur->type,
$this->mCur->value
]
);
}
// Giving too few arguments to a function is a pretty common error. If we check it here
// (as well as at runtime, for OCD), we can make checkSyntax only try to build the AST, as
// there would be way less runtime errors. Moreover, this check will also be performed inside
// skipped branches, e.g. the discarded if/else branch.
$this->checkArgCount( $args, $func );
$this->move();
array_unshift( $args, $func );
return new AFPTreeNode( AFPTreeNode::FUNCTION_CALL, $args, $position );
}
return $this->doLevelAtom();
}
/**
* Handle literals.
* @return AFPTreeNode
* @throws AFPUserVisibleException
*/
protected function doLevelAtom() {
$tok = $this->mCur->value;
switch ( $this->mCur->type ) {
case AFPToken::TID:
case AFPToken::TSTRING:
case AFPToken::TFLOAT:
case AFPToken::TINT:
$result = new AFPTreeNode( AFPTreeNode::ATOM, $this->mCur, $this->mPos );
break;
case AFPToken::TKEYWORD:
if ( in_array( $tok, [ "true", "false", "null" ] ) ) {
$result = new AFPTreeNode( AFPTreeNode::ATOM, $this->mCur, $this->mPos );
break;
}
throw new AFPUserVisibleException(
'unrecognisedkeyword',
$this->mPos,
[ $tok ]
);
/** @noinspection PhpMissingBreakStatementInspection */
case AFPToken::TSQUAREBRACKET:
if ( $this->mCur->value === '[' ) {
$array = [];
while ( true ) {
$this->move();
if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) {
break;
}
$array[] = $this->doLevelSet();
if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) {
break;
}
if ( $this->mCur->type !== AFPToken::TCOMMA ) {
throw new AFPUserVisibleException(
'expectednotfound',
$this->mPos,
[ ', or ]', $this->mCur->type, $this->mCur->value ]
);
}
}
$result = new AFPTreeNode( AFPTreeNode::ARRAY_DEFINITION, $array, $this->mPos );
break;
}
// Fallthrough expected
default:
throw new AFPUserVisibleException(
'unexpectedtoken',
$this->mPos,
[
$this->mCur->type,
$this->mCur->value
]
);
}
$this->move();
return $result;
}
/**
* Check that a built-in function has been provided the right amount of arguments
*
* @param array $args The arguments supplied to the function
* @param string $func The function name
* @throws AFPUserVisibleException
* @see AbuseFilterParser::checkArgCount()
* @todo This is a duplicate of AbuseFilter::checkEnoughArguments, and such duplication
* should be avoided when merging the parsers.
*/
protected function checkArgCount( $args, $func ) {
$logger = LoggerFactory::getInstance( 'AbuseFilter' );
if ( !array_key_exists( $func, AbuseFilterParser::$funcArgCount ) ) {
throw new InvalidArgumentException( "$func is not a valid function." );
}
list( $min, $max ) = AbuseFilterParser::$funcArgCount[ $func ];
if ( count( $args ) < $min ) {
throw new AFPUserVisibleException(
$min === 1 ? 'noparams' : 'notenoughargs',
$this->mCur->pos,
[ $func, $min, count( $args ) ]
);
} elseif ( count( $args ) > $max ) {
$logger->warning(
"Too many params to $func for filter: " . ( $this->mFilter ?? 'unavailable' )
);
/*
@todo Uncomment after fixing filters in WMF production
throw new AFPUserVisibleException(
'toomanyargs',
$this->mCur->pos,
[ $func, $max, count( $args ) ]
);
*/
}
}
}