mediawiki-extensions-AbuseF.../includes/parser/AFPTreeParser.php
Daimona Eaytoy af7744781f Allow if without else
Bug: T230727
Depends-On: I8e7f7710b8cb37ada8531b631456a3ce7b27ee45
Change-Id: I3b85087677607573f4fa68681735dc35348dcd87
2019-08-20 19:36:14 +00:00

680 lines
17 KiB
PHP

<?php
/**
* A version of the abuse filter parser that separates parsing the filter and
* evaluating it into different passes, allowing the parse tree to be cached.
*
* @file
*/
/**
* A parser that transforms the text of the filter into a parse tree.
*/
class AFPTreeParser {
// The tokenized representation of the filter parsed.
public $mTokens;
// Current token handled by the parser and its position.
public $mCur, $mPos;
const CACHE_VERSION = 2;
/**
* Create a new instance
*/
public function __construct() {
$this->resetState();
}
/**
* Resets the state
*/
public function resetState() {
$this->mTokens = [];
$this->mPos = 0;
}
/**
* Advances the parser to the next token in the filter code.
*/
protected function move() {
list( $this->mCur, $this->mPos ) = $this->mTokens[$this->mPos];
}
/**
* Get the next token. This is similar to move() but doesn't change class members,
* allowing to look ahead without rolling back the state.
*
* @return AFPToken
*/
protected function getNextToken() {
return $this->mTokens[$this->mPos][0];
}
/**
* getState() function allows parser state to be rollbacked to several tokens
* back.
*
* @return AFPParserState
*/
protected function getState() {
return new AFPParserState( $this->mCur, $this->mPos );
}
/**
* setState() function allows parser state to be rollbacked to several tokens
* back.
*
* @param AFPParserState $state
*/
protected function setState( AFPParserState $state ) {
$this->mCur = $state->token;
$this->mPos = $state->pos;
}
/**
* Parse the supplied filter source code into a tree.
*
* @param string $code
* @throws AFPUserVisibleException
* @return AFPTreeNode|null Null only if no statements
*/
public function parse( $code ) {
$this->mTokens = AbuseFilterTokenizer::getTokens( $code );
$this->mPos = 0;
return $this->doLevelEntry();
}
/* Levels */
/**
* Handles unexpected characters after the expression.
* @return AFPTreeNode|null
* @throws AFPUserVisibleException
*/
protected function doLevelEntry() {
$result = $this->doLevelSemicolon();
if ( $this->mCur->type !== AFPToken::TNONE ) {
throw new AFPUserVisibleException(
'unexpectedatend',
$this->mPos, [ $this->mCur->type ]
);
}
return $result;
}
/**
* Handles the semicolon operator.
*
* @return AFPTreeNode|null
*/
protected function doLevelSemicolon() {
$statements = [];
do {
$this->move();
$position = $this->mPos;
if (
$this->mCur->type === AFPToken::TNONE ||
( $this->mCur->type === AFPToken::TBRACE && $this->mCur->value == ')' )
) {
// Handle special cases which the other parser handled in doLevelAtom
break;
}
// Allow empty statements.
if ( $this->mCur->type === AFPToken::TSTATEMENTSEPARATOR ) {
continue;
}
$statements[] = $this->doLevelSet();
$position = $this->mPos;
} while ( $this->mCur->type === AFPToken::TSTATEMENTSEPARATOR );
// Flatten the tree if possible.
if ( count( $statements ) === 0 ) {
return null;
} elseif ( count( $statements ) === 1 ) {
return $statements[0];
} else {
return new AFPTreeNode( AFPTreeNode::SEMICOLON, $statements, $position );
}
}
/**
* Handles variable assignment.
*
* @return AFPTreeNode
* @throws AFPUserVisibleException
*/
protected function doLevelSet() {
if ( $this->mCur->type === AFPToken::TID ) {
$varname = $this->mCur->value;
// Speculatively parse the assignment statement assuming it can
// potentially be an assignment, but roll back if it isn't.
$initialState = $this->getState();
$this->move();
if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === ':=' ) {
$position = $this->mPos;
$this->move();
$value = $this->doLevelSet();
return new AFPTreeNode( AFPTreeNode::ASSIGNMENT, [ $varname, $value ], $position );
}
if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === '[' ) {
$this->move();
if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) {
$index = 'append';
} else {
// Parse index offset.
$this->setState( $initialState );
$this->move();
$index = $this->doLevelSemicolon();
if ( !( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) ) {
throw new AFPUserVisibleException( 'expectednotfound', $this->mPos,
[ ']', $this->mCur->type, $this->mCur->value ] );
}
}
$this->move();
if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === ':=' ) {
$position = $this->mPos;
$this->move();
$value = $this->doLevelSet();
if ( $index === 'append' ) {
return new AFPTreeNode(
AFPTreeNode::ARRAY_APPEND, [ $varname, $value ], $position );
} else {
return new AFPTreeNode(
AFPTreeNode::INDEX_ASSIGNMENT,
[ $varname, $index, $value ],
$position
);
}
}
}
// If we reached this point, we did not find an assignment. Roll back
// and assume this was just a literal.
$this->setState( $initialState );
}
return $this->doLevelConditions();
}
/**
* Handles ternary operator and if-then-else-end.
*
* @return AFPTreeNode
* @throws AFPUserVisibleException
*/
protected function doLevelConditions() {
if ( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'if' ) {
$position = $this->mPos;
$this->move();
$condition = $this->doLevelBoolOps();
if ( !( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'then' ) ) {
throw new AFPUserVisibleException( 'expectednotfound',
$this->mPos,
[
'then',
$this->mCur->type,
$this->mCur->value
]
);
}
$this->move();
$valueIfTrue = $this->doLevelConditions();
if ( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'else' ) {
$this->move();
$valueIfFalse = $this->doLevelConditions();
} else {
$valueIfFalse = null;
}
if ( !( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'end' ) ) {
throw new AFPUserVisibleException( 'expectednotfound',
$this->mPos,
[
'end',
$this->mCur->type,
$this->mCur->value
]
);
}
$this->move();
return new AFPTreeNode(
AFPTreeNode::CONDITIONAL,
[ $condition, $valueIfTrue, $valueIfFalse ],
$position
);
}
$condition = $this->doLevelBoolOps();
if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === '?' ) {
$position = $this->mPos;
$this->move();
$valueIfTrue = $this->doLevelConditions();
if ( !( $this->mCur->type === AFPToken::TOP && $this->mCur->value === ':' ) ) {
throw new AFPUserVisibleException( 'expectednotfound',
$this->mPos,
[
':',
$this->mCur->type,
$this->mCur->value
]
);
}
$this->move();
$valueIfFalse = $this->doLevelConditions();
return new AFPTreeNode(
AFPTreeNode::CONDITIONAL,
[ $condition, $valueIfTrue, $valueIfFalse ],
$position
);
}
return $condition;
}
/**
* Handles logic operators.
*
* @return AFPTreeNode
*/
protected function doLevelBoolOps() {
$leftOperand = $this->doLevelCompares();
$ops = [ '&', '|', '^' ];
while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
$op = $this->mCur->value;
$position = $this->mPos;
$this->move();
$rightOperand = $this->doLevelCompares();
$leftOperand = new AFPTreeNode(
AFPTreeNode::LOGIC,
[ $op, $leftOperand, $rightOperand ],
$position
);
}
return $leftOperand;
}
/**
* Handles comparison operators.
*
* @return AFPTreeNode
*/
protected function doLevelCompares() {
$leftOperand = $this->doLevelSumRels();
$equalityOps = [ '==', '===', '!=', '!==', '=' ];
$orderOps = [ '<', '>', '<=', '>=' ];
// Only allow either a single operation, or a combination of a single equalityOps and a single
// orderOps. This resembles what PHP does, and allows `a < b == c` while rejecting `a < b < c`
$allowedOps = array_merge( $equalityOps, $orderOps );
while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $allowedOps ) ) {
$op = $this->mCur->value;
$allowedOps = in_array( $op, $equalityOps ) ?
array_diff( $allowedOps, $equalityOps ) :
array_diff( $allowedOps, $orderOps );
$position = $this->mPos;
$this->move();
$rightOperand = $this->doLevelSumRels();
$leftOperand = new AFPTreeNode(
AFPTreeNode::COMPARE,
[ $op, $leftOperand, $rightOperand ],
$position
);
}
return $leftOperand;
}
/**
* Handle addition and subtraction.
*
* @return AFPTreeNode
*/
protected function doLevelSumRels() {
$leftOperand = $this->doLevelMulRels();
$ops = [ '+', '-' ];
while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
$op = $this->mCur->value;
$position = $this->mPos;
$this->move();
$rightOperand = $this->doLevelMulRels();
$leftOperand = new AFPTreeNode(
AFPTreeNode::SUM_REL,
[ $op, $leftOperand, $rightOperand ],
$position
);
}
return $leftOperand;
}
/**
* Handles multiplication and division.
*
* @return AFPTreeNode
*/
protected function doLevelMulRels() {
$leftOperand = $this->doLevelPow();
$ops = [ '*', '/', '%' ];
while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
$op = $this->mCur->value;
$position = $this->mPos;
$this->move();
$rightOperand = $this->doLevelPow();
$leftOperand = new AFPTreeNode(
AFPTreeNode::MUL_REL,
[ $op, $leftOperand, $rightOperand ],
$position
);
}
return $leftOperand;
}
/**
* Handles exponentiation.
*
* @return AFPTreeNode
*/
protected function doLevelPow() {
$base = $this->doLevelBoolInvert();
while ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === '**' ) {
$position = $this->mPos;
$this->move();
$exponent = $this->doLevelBoolInvert();
$base = new AFPTreeNode( AFPTreeNode::POW, [ $base, $exponent ], $position );
}
return $base;
}
/**
* Handles boolean inversion.
*
* @return AFPTreeNode
*/
protected function doLevelBoolInvert() {
if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === '!' ) {
$position = $this->mPos;
$this->move();
$argument = $this->doLevelKeywordOperators();
return new AFPTreeNode( AFPTreeNode::BOOL_INVERT, [ $argument ], $position );
}
return $this->doLevelKeywordOperators();
}
/**
* Handles keyword operators.
*
* @return AFPTreeNode
*/
protected function doLevelKeywordOperators() {
$leftOperand = $this->doLevelUnarys();
$keyword = strtolower( $this->mCur->value );
if ( $this->mCur->type === AFPToken::TKEYWORD &&
isset( AbuseFilterParser::$mKeywords[$keyword] )
) {
$position = $this->mPos;
$this->move();
$rightOperand = $this->doLevelUnarys();
return new AFPTreeNode(
AFPTreeNode::KEYWORD_OPERATOR,
[ $keyword, $leftOperand, $rightOperand ],
$position
);
}
return $leftOperand;
}
/**
* Handles unary operators.
*
* @return AFPTreeNode
*/
protected function doLevelUnarys() {
$op = $this->mCur->value;
if ( $this->mCur->type === AFPToken::TOP && ( $op === "+" || $op === "-" ) ) {
$position = $this->mPos;
$this->move();
$argument = $this->doLevelArrayElements();
return new AFPTreeNode( AFPTreeNode::UNARY, [ $op, $argument ], $position );
}
return $this->doLevelArrayElements();
}
/**
* Handles accessing an array element by an offset.
*
* @return AFPTreeNode
* @throws AFPUserVisibleException
*/
protected function doLevelArrayElements() {
$array = $this->doLevelParenthesis();
while ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === '[' ) {
$position = $this->mPos;
$index = $this->doLevelSemicolon();
$array = new AFPTreeNode( AFPTreeNode::ARRAY_INDEX, [ $array, $index ], $position );
if ( !( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) ) {
throw new AFPUserVisibleException( 'expectednotfound', $this->mPos,
[ ']', $this->mCur->type, $this->mCur->value ] );
}
$this->move();
}
return $array;
}
/**
* Handles parenthesis.
*
* @return AFPTreeNode
* @throws AFPUserVisibleException
*/
protected function doLevelParenthesis() {
if ( $this->mCur->type === AFPToken::TBRACE && $this->mCur->value === '(' ) {
$result = $this->doLevelSemicolon();
if ( !( $this->mCur->type === AFPToken::TBRACE && $this->mCur->value === ')' ) ) {
throw new AFPUserVisibleException(
'expectednotfound',
$this->mPos,
[ ')', $this->mCur->type, $this->mCur->value ]
);
}
$this->move();
return $result;
}
return $this->doLevelFunction();
}
/**
* Handles function calls.
*
* @return AFPTreeNode
* @throws AFPUserVisibleException
*/
protected function doLevelFunction() {
if ( $this->mCur->type === AFPToken::TID &&
isset( AbuseFilterParser::$mFunctions[$this->mCur->value] )
) {
$func = $this->mCur->value;
$position = $this->mPos;
$this->move();
if ( $this->mCur->type !== AFPToken::TBRACE || $this->mCur->value !== '(' ) {
throw new AFPUserVisibleException( 'expectednotfound',
$this->mPos,
[
'(',
$this->mCur->type,
$this->mCur->value
]
);
}
if ( ( $func === 'set' || $func === 'set_var' ) ) {
$state = $this->getState();
$this->move();
$next = $this->getNextToken();
if (
$this->mCur->type !== AFPToken::TSTRING ||
(
$next->type !== AFPToken::TCOMMA &&
// Let this fail later, when checking parameters count
!( $next->type === AFPToken::TBRACE && $next->value === ')' )
)
) {
throw new AFPUserVisibleException( 'variablevariable', $this->mPos, [] );
} else {
$this->setState( $state );
}
}
$args = [];
$next = $this->getNextToken();
if ( $next->type !== AFPToken::TBRACE || $next->value !== ')' ) {
do {
$args[] = $this->doLevelSemicolon();
} while ( $this->mCur->type === AFPToken::TCOMMA );
} else {
$this->move();
}
if ( $this->mCur->type !== AFPToken::TBRACE || $this->mCur->value !== ')' ) {
throw new AFPUserVisibleException( 'expectednotfound',
$this->mPos,
[
')',
$this->mCur->type,
$this->mCur->value
]
);
}
// Giving too few arguments to a function is a pretty common error. If we check it here
// (as well as at runtime, for OCD), we can make checkSyntax only try to build the AST, as
// there would be way less runtime errors. Moreover, this check will also be performed inside
// skipped branches, e.g. the discarded if/else branch.
$this->checkEnoughArguments( $args, $func );
$this->move();
array_unshift( $args, $func );
return new AFPTreeNode( AFPTreeNode::FUNCTION_CALL, $args, $position );
}
return $this->doLevelAtom();
}
/**
* Handle literals.
* @return AFPTreeNode
* @throws AFPUserVisibleException
*/
protected function doLevelAtom() {
$tok = $this->mCur->value;
switch ( $this->mCur->type ) {
case AFPToken::TID:
case AFPToken::TSTRING:
case AFPToken::TFLOAT:
case AFPToken::TINT:
$result = new AFPTreeNode( AFPTreeNode::ATOM, $this->mCur, $this->mPos );
break;
case AFPToken::TKEYWORD:
if ( in_array( $tok, [ "true", "false", "null" ] ) ) {
$result = new AFPTreeNode( AFPTreeNode::ATOM, $this->mCur, $this->mPos );
break;
}
throw new AFPUserVisibleException(
'unrecognisedkeyword',
$this->mPos,
[ $tok ]
);
/** @noinspection PhpMissingBreakStatementInspection */
case AFPToken::TSQUAREBRACKET:
if ( $this->mCur->value === '[' ) {
$array = [];
while ( true ) {
$this->move();
if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) {
break;
}
$array[] = $this->doLevelSet();
if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) {
break;
}
if ( $this->mCur->type !== AFPToken::TCOMMA ) {
throw new AFPUserVisibleException(
'expectednotfound',
$this->mPos,
[ ', or ]', $this->mCur->type, $this->mCur->value ]
);
}
}
$result = new AFPTreeNode( AFPTreeNode::ARRAY_DEFINITION, $array, $this->mPos );
break;
}
// Fallthrough expected
default:
throw new AFPUserVisibleException(
'unexpectedtoken',
$this->mPos,
[
$this->mCur->type,
$this->mCur->value
]
);
}
$this->move();
return $result;
}
/**
* Check that a built-in function has been provided enough arguments
*
* @param array $args The arguments supplied to the function
* @param string $func The function name
* @throws AFPUserVisibleException
* @see AbuseFilterParser::checkEnoughArguments()
* @todo This is a duplicate of AbuseFilter::checkEnoughArguments, and such duplication
* should be avoided when merging the parsers.
*/
protected function checkEnoughArguments( $args, $func ) {
if ( !array_key_exists( $func, AbuseFilterParser::$minArgFunc ) ) {
throw new InvalidArgumentException( "$func is not a valid function." );
}
$count = AbuseFilterParser::$minArgFunc[ $func ];
if ( count( $args ) < $count ) {
throw new AFPUserVisibleException(
$count === 1 ? 'noparams' : 'notenoughargs',
$this->mCur->pos,
[ $func, $count, count( $args ) ]
);
}
}
}