cache = $cache; $this->logger = $logger; $this->resetState(); } /** * @param string $filter */ public function setFilter( $filter ) { $this->mFilter = $filter; } /** * Resets the state */ public function resetState() { $this->mTokens = []; $this->mPos = 0; $this->mFilter = null; } /** * Advances the parser to the next token in the filter code. */ protected function move() { list( $this->mCur, $this->mPos ) = $this->mTokens[$this->mPos]; } /** * Get the next token. This is similar to move() but doesn't change class members, * allowing to look ahead without rolling back the state. * * @return AFPToken */ protected function getNextToken() { return $this->mTokens[$this->mPos][0]; } /** * getState() function allows parser state to be rollbacked to several tokens * back. * * @return AFPParserState */ protected function getState() { return new AFPParserState( $this->mCur, $this->mPos ); } /** * setState() function allows parser state to be rollbacked to several tokens * back. * * @param AFPParserState $state */ protected function setState( AFPParserState $state ) { $this->mCur = $state->token; $this->mPos = $state->pos; } /** * Parse the supplied filter source code into a tree. * * @param string $code * @throws AFPUserVisibleException * @return AFPSyntaxTree */ public function parse( $code ) : AFPSyntaxTree { $tokenizer = new AbuseFilterTokenizer( $this->cache, $this->logger ); $this->mTokens = $tokenizer->getTokens( $code ); $this->mPos = 0; return $this->buildSyntaxTree(); } /** * @return AFPSyntaxTree */ public function buildSyntaxTree() : AFPSyntaxTree { $root = $this->doLevelEntry(); return new AFPSyntaxTree( $root ); } /* Levels */ /** * Handles unexpected characters after the expression. * @return AFPTreeNode|null Null only if no statements * @throws AFPUserVisibleException */ protected function doLevelEntry() { $result = $this->doLevelSemicolon(); if ( $this->mCur->type !== AFPToken::TNONE ) { throw new AFPUserVisibleException( 'unexpectedatend', $this->mPos, [ $this->mCur->type ] ); } return $result; } /** * Handles the semicolon operator. * * @return AFPTreeNode|null */ protected function doLevelSemicolon() { $statements = []; do { $this->move(); $position = $this->mPos; if ( $this->mCur->type === AFPToken::TNONE || ( $this->mCur->type === AFPToken::TBRACE && $this->mCur->value == ')' ) ) { // Handle special cases which the other parser handled in doLevelAtom break; } // Allow empty statements. if ( $this->mCur->type === AFPToken::TSTATEMENTSEPARATOR ) { continue; } $statements[] = $this->doLevelSet(); $position = $this->mPos; } while ( $this->mCur->type === AFPToken::TSTATEMENTSEPARATOR ); // Flatten the tree if possible. if ( count( $statements ) === 0 ) { return null; } elseif ( count( $statements ) === 1 ) { return $statements[0]; } else { return new AFPTreeNode( AFPTreeNode::SEMICOLON, $statements, $position ); } } /** * Handles variable assignment. * * @return AFPTreeNode * @throws AFPUserVisibleException */ protected function doLevelSet() { if ( $this->mCur->type === AFPToken::TID ) { $varname = $this->mCur->value; // Speculatively parse the assignment statement assuming it can // potentially be an assignment, but roll back if it isn't. // @todo Use $this->getNextToken for clearer code $initialState = $this->getState(); $this->move(); if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === ':=' ) { $position = $this->mPos; $this->move(); $value = $this->doLevelSet(); return new AFPTreeNode( AFPTreeNode::ASSIGNMENT, [ $varname, $value ], $position ); } if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === '[' ) { $this->move(); if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) { $index = 'append'; } else { // Parse index offset. $this->setState( $initialState ); $this->move(); $index = $this->doLevelSemicolon(); if ( !( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) ) { throw new AFPUserVisibleException( 'expectednotfound', $this->mPos, [ ']', $this->mCur->type, $this->mCur->value ] ); } } $this->move(); if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === ':=' ) { $position = $this->mPos; $this->move(); $value = $this->doLevelSet(); if ( $index === 'append' ) { return new AFPTreeNode( AFPTreeNode::ARRAY_APPEND, [ $varname, $value ], $position ); } else { return new AFPTreeNode( AFPTreeNode::INDEX_ASSIGNMENT, [ $varname, $index, $value ], $position ); } } } // If we reached this point, we did not find an assignment. Roll back // and assume this was just a literal. $this->setState( $initialState ); } return $this->doLevelConditions(); } /** * Handles ternary operator and if-then-else-end. * * @return AFPTreeNode * @throws AFPUserVisibleException */ protected function doLevelConditions() { if ( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'if' ) { $position = $this->mPos; $this->move(); $condition = $this->doLevelBoolOps(); if ( !( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'then' ) ) { throw new AFPUserVisibleException( 'expectednotfound', $this->mPos, [ 'then', $this->mCur->type, $this->mCur->value ] ); } $this->move(); $valueIfTrue = $this->doLevelConditions(); if ( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'else' ) { $this->move(); $valueIfFalse = $this->doLevelConditions(); } else { $valueIfFalse = null; } if ( !( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'end' ) ) { throw new AFPUserVisibleException( 'expectednotfound', $this->mPos, [ 'end', $this->mCur->type, $this->mCur->value ] ); } $this->move(); return new AFPTreeNode( AFPTreeNode::CONDITIONAL, [ $condition, $valueIfTrue, $valueIfFalse ], $position ); } $condition = $this->doLevelBoolOps(); if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === '?' ) { $position = $this->mPos; $this->move(); $valueIfTrue = $this->doLevelConditions(); if ( !( $this->mCur->type === AFPToken::TOP && $this->mCur->value === ':' ) ) { throw new AFPUserVisibleException( 'expectednotfound', $this->mPos, [ ':', $this->mCur->type, $this->mCur->value ] ); } $this->move(); $valueIfFalse = $this->doLevelConditions(); return new AFPTreeNode( AFPTreeNode::CONDITIONAL, [ $condition, $valueIfTrue, $valueIfFalse ], $position ); } return $condition; } /** * Handles logic operators. * * @return AFPTreeNode */ protected function doLevelBoolOps() { $leftOperand = $this->doLevelCompares(); $ops = [ '&', '|', '^' ]; while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) { $op = $this->mCur->value; $position = $this->mPos; $this->move(); $rightOperand = $this->doLevelCompares(); $leftOperand = new AFPTreeNode( AFPTreeNode::LOGIC, [ $op, $leftOperand, $rightOperand ], $position ); } return $leftOperand; } /** * Handles comparison operators. * * @return AFPTreeNode */ protected function doLevelCompares() { $leftOperand = $this->doLevelSumRels(); $equalityOps = [ '==', '===', '!=', '!==', '=' ]; $orderOps = [ '<', '>', '<=', '>=' ]; // Only allow either a single operation, or a combination of a single equalityOps and a single // orderOps. This resembles what PHP does, and allows `a < b == c` while rejecting `a < b < c` $allowedOps = array_merge( $equalityOps, $orderOps ); while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $allowedOps ) ) { $op = $this->mCur->value; $allowedOps = in_array( $op, $equalityOps ) ? array_diff( $allowedOps, $equalityOps ) : array_diff( $allowedOps, $orderOps ); $position = $this->mPos; $this->move(); $rightOperand = $this->doLevelSumRels(); $leftOperand = new AFPTreeNode( AFPTreeNode::COMPARE, [ $op, $leftOperand, $rightOperand ], $position ); } return $leftOperand; } /** * Handle addition and subtraction. * * @return AFPTreeNode */ protected function doLevelSumRels() { $leftOperand = $this->doLevelMulRels(); $ops = [ '+', '-' ]; while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) { $op = $this->mCur->value; $position = $this->mPos; $this->move(); $rightOperand = $this->doLevelMulRels(); $leftOperand = new AFPTreeNode( AFPTreeNode::SUM_REL, [ $op, $leftOperand, $rightOperand ], $position ); } return $leftOperand; } /** * Handles multiplication and division. * * @return AFPTreeNode */ protected function doLevelMulRels() { $leftOperand = $this->doLevelPow(); $ops = [ '*', '/', '%' ]; while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) { $op = $this->mCur->value; $position = $this->mPos; $this->move(); $rightOperand = $this->doLevelPow(); $leftOperand = new AFPTreeNode( AFPTreeNode::MUL_REL, [ $op, $leftOperand, $rightOperand ], $position ); } return $leftOperand; } /** * Handles exponentiation. * * @return AFPTreeNode */ protected function doLevelPow() { $base = $this->doLevelBoolInvert(); while ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === '**' ) { $position = $this->mPos; $this->move(); $exponent = $this->doLevelBoolInvert(); $base = new AFPTreeNode( AFPTreeNode::POW, [ $base, $exponent ], $position ); } return $base; } /** * Handles boolean inversion. * * @return AFPTreeNode */ protected function doLevelBoolInvert() { if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === '!' ) { $position = $this->mPos; $this->move(); $argument = $this->doLevelKeywordOperators(); return new AFPTreeNode( AFPTreeNode::BOOL_INVERT, [ $argument ], $position ); } return $this->doLevelKeywordOperators(); } /** * Handles keyword operators. * * @return AFPTreeNode */ protected function doLevelKeywordOperators() { $leftOperand = $this->doLevelUnarys(); $keyword = strtolower( $this->mCur->value ); if ( $this->mCur->type === AFPToken::TKEYWORD && isset( AbuseFilterParser::$mKeywords[$keyword] ) ) { $position = $this->mPos; $this->move(); $rightOperand = $this->doLevelUnarys(); return new AFPTreeNode( AFPTreeNode::KEYWORD_OPERATOR, [ $keyword, $leftOperand, $rightOperand ], $position ); } return $leftOperand; } /** * Handles unary operators. * * @return AFPTreeNode */ protected function doLevelUnarys() { $op = $this->mCur->value; if ( $this->mCur->type === AFPToken::TOP && ( $op === "+" || $op === "-" ) ) { $position = $this->mPos; $this->move(); $argument = $this->doLevelArrayElements(); return new AFPTreeNode( AFPTreeNode::UNARY, [ $op, $argument ], $position ); } return $this->doLevelArrayElements(); } /** * Handles accessing an array element by an offset. * * @return AFPTreeNode * @throws AFPUserVisibleException */ protected function doLevelArrayElements() { $array = $this->doLevelParenthesis(); while ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === '[' ) { $position = $this->mPos; $index = $this->doLevelSemicolon(); $array = new AFPTreeNode( AFPTreeNode::ARRAY_INDEX, [ $array, $index ], $position ); if ( !( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) ) { throw new AFPUserVisibleException( 'expectednotfound', $this->mPos, [ ']', $this->mCur->type, $this->mCur->value ] ); } $this->move(); } return $array; } /** * Handles parenthesis. * * @return AFPTreeNode * @throws AFPUserVisibleException */ protected function doLevelParenthesis() { if ( $this->mCur->type === AFPToken::TBRACE && $this->mCur->value === '(' ) { $result = $this->doLevelSemicolon(); if ( !( $this->mCur->type === AFPToken::TBRACE && $this->mCur->value === ')' ) ) { throw new AFPUserVisibleException( 'expectednotfound', $this->mPos, [ ')', $this->mCur->type, $this->mCur->value ] ); } $this->move(); return $result; } return $this->doLevelFunction(); } /** * Handles function calls. * * @return AFPTreeNode * @throws AFPUserVisibleException */ protected function doLevelFunction() { if ( $this->mCur->type === AFPToken::TID && isset( AbuseFilterParser::$mFunctions[$this->mCur->value] ) ) { $func = $this->mCur->value; $position = $this->mPos; $this->move(); if ( $this->mCur->type !== AFPToken::TBRACE || $this->mCur->value !== '(' ) { throw new AFPUserVisibleException( 'expectednotfound', $this->mPos, [ '(', $this->mCur->type, $this->mCur->value ] ); } if ( ( $func === 'set' || $func === 'set_var' ) ) { $state = $this->getState(); $this->move(); $next = $this->getNextToken(); if ( $this->mCur->type !== AFPToken::TSTRING || ( $next->type !== AFPToken::TCOMMA && // Let this fail later, when checking parameters count !( $next->type === AFPToken::TBRACE && $next->value === ')' ) ) ) { throw new AFPUserVisibleException( 'variablevariable', $this->mPos, [] ); } else { $this->setState( $state ); } } $args = []; $next = $this->getNextToken(); if ( $next->type !== AFPToken::TBRACE || $next->value !== ')' ) { do { $thisArg = $this->doLevelSemicolon(); if ( $thisArg === null && !$this->functionIsVariadic( $func ) ) { throw new AFPUserVisibleException( 'unexpectedtoken', $this->mPos, [ $this->mCur->type, $this->mCur->value ] ); } elseif ( $thisArg !== null ) { $args[] = $thisArg; } } while ( $this->mCur->type === AFPToken::TCOMMA ); } else { $this->move(); } if ( $this->mCur->type !== AFPToken::TBRACE || $this->mCur->value !== ')' ) { throw new AFPUserVisibleException( 'expectednotfound', $this->mPos, [ ')', $this->mCur->type, $this->mCur->value ] ); } // Giving too few arguments to a function is a pretty common error. If we check it here // (as well as at runtime, for OCD), we can make checkSyntax only try to build the AST, as // there would be way less runtime errors. Moreover, this check will also be performed inside // skipped branches, e.g. the discarded if/else branch. $this->checkArgCount( $args, $func ); $this->move(); array_unshift( $args, $func ); return new AFPTreeNode( AFPTreeNode::FUNCTION_CALL, $args, $position ); } return $this->doLevelAtom(); } /** * Handle literals. * @return AFPTreeNode * @throws AFPUserVisibleException */ protected function doLevelAtom() { $tok = $this->mCur->value; switch ( $this->mCur->type ) { case AFPToken::TID: case AFPToken::TSTRING: case AFPToken::TFLOAT: case AFPToken::TINT: $result = new AFPTreeNode( AFPTreeNode::ATOM, $this->mCur, $this->mPos ); break; case AFPToken::TKEYWORD: if ( in_array( $tok, [ "true", "false", "null" ] ) ) { $result = new AFPTreeNode( AFPTreeNode::ATOM, $this->mCur, $this->mPos ); break; } throw new AFPUserVisibleException( 'unrecognisedkeyword', $this->mPos, [ $tok ] ); /** @noinspection PhpMissingBreakStatementInspection */ case AFPToken::TSQUAREBRACKET: if ( $this->mCur->value === '[' ) { $array = []; while ( true ) { $this->move(); if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) { break; } $array[] = $this->doLevelSet(); if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) { break; } if ( $this->mCur->type !== AFPToken::TCOMMA ) { throw new AFPUserVisibleException( 'expectednotfound', $this->mPos, [ ', or ]', $this->mCur->type, $this->mCur->value ] ); } } $result = new AFPTreeNode( AFPTreeNode::ARRAY_DEFINITION, $array, $this->mPos ); break; } // Fallthrough expected default: throw new AFPUserVisibleException( 'unexpectedtoken', $this->mPos, [ $this->mCur->type, $this->mCur->value ] ); } $this->move(); return $result; } /** * Check that a built-in function has been provided the right amount of arguments * * @param array $args The arguments supplied to the function * @param string $func The function name * @throws AFPUserVisibleException * @see AbuseFilterParser::checkArgCount() * @todo This is a duplicate of AbuseFilter::checkEnoughArguments, and such duplication * should be avoided when merging the parsers. */ protected function checkArgCount( $args, $func ) { if ( !array_key_exists( $func, AbuseFilterParser::$funcArgCount ) ) { throw new InvalidArgumentException( "$func is not a valid function." ); } list( $min, $max ) = AbuseFilterParser::$funcArgCount[ $func ]; if ( count( $args ) < $min ) { throw new AFPUserVisibleException( $min === 1 ? 'noparams' : 'notenoughargs', $this->mCur->pos, [ $func, $min, count( $args ) ] ); } elseif ( count( $args ) > $max ) { $this->logger->warning( "Too many params to $func for filter: " . ( $this->mFilter ?? 'unavailable' ) ); /* @todo Uncomment after fixing filters in WMF production throw new AFPUserVisibleException( 'toomanyargs', $this->mCur->pos, [ $func, $max, count( $args ) ] ); */ } } /** * @param string $fname * @return bool * @see AbuseFilterParser::functionIsVariadic */ protected function functionIsVariadic( $fname ) { if ( !array_key_exists( $fname, AbuseFilterParser::$funcArgCount ) ) { throw new InvalidArgumentException( "Function $fname is not valid" ); } return AbuseFilterParser::$funcArgCount[$fname][1] === INF; } }