treeRoot = $tree->getRoot(); $this->keywordsManager = $keywordsManager; $this->mode = $mode; $this->checkUnusedVars = $checkUnusedVars; } /** * Start the static analysis * * @throws UserVisibleException */ public function start(): void { if ( !$this->treeRoot ) { return; } $bound = $this->check( $this->desugar( $this->treeRoot ), [] ); $unused = array_keys( array_filter( $bound, static function ( $v ) { return !$v; } ) ); if ( $this->checkUnusedVars && $unused ) { throw new UserVisibleException( 'unusedvars', self::DUMMYPOS, [ Message::listParam( $unused, 'comma' ) ] ); } } /** * Remove syntactic sugar so that we don't need to deal with * too many cases. * * This could benefit the evaluator as well, but for now, this is * only used for static analysis. * * Postcondition: * - The tree will not contain nodes of * type ASSIGNMENT, LOGIC, COMPARE, SUM_REL, MUL_REL, POW, * KEYWORD_OPERATOR, and ARRAY_INDEX * - The tree may additionally contain a node of type BINOP. * - The tree should not have set_var function application. * - Conditionals will have both branches. * * @param AFPTreeNode $node * @return AFPTreeNode * @throws InternalException */ private function desugar( AFPTreeNode $node ): AFPTreeNode { switch ( $node->type ) { case AFPTreeNode::ATOM: return $node; case AFPTreeNode::FUNCTION_CALL: if ( $node->children[0] === 'set_var' ) { $node->children[0] = 'set'; } return $this->newNodeMapExceptFirst( $node ); case AFPTreeNode::ARRAY_INDEX: return $this->newNodeNamedBinop( $node, '[]' ); case AFPTreeNode::POW: return $this->newNodeNamedBinop( $node, '**' ); case AFPTreeNode::UNARY: case AFPTreeNode::INDEX_ASSIGNMENT: case AFPTreeNode::ARRAY_APPEND: return $this->newNodeMapExceptFirst( $node ); case AFPTreeNode::BOOL_INVERT: /* * @todo this should really be combined with UNARY, * but let's wait to change the meaning of UNARY across * the codebase together */ return $this->newNodeMapAll( $node ); case AFPTreeNode::KEYWORD_OPERATOR: case AFPTreeNode::MUL_REL: case AFPTreeNode::SUM_REL: case AFPTreeNode::COMPARE: return $this->newNodeBinop( $node ); case AFPTreeNode::LOGIC: $result = $this->newNodeBinop( $node ); list( $op, $left, $right ) = $result->children; if ( $op === '&' || $op === '|' ) { return $this->desugarAndOr( $op, $left, $right, $node->position ); } else { return $result; } case AFPTreeNode::ARRAY_DEFINITION: case AFPTreeNode::SEMICOLON: return $this->newNodeMapAll( $node ); case AFPTreeNode::CONDITIONAL: if ( $node->children[2] === null ) { $node->children[2] = new AFPTreeNode( AFPTreeNode::ATOM, new AFPToken( AFPToken::TKEYWORD, "null", $node->position ), $node->position ); } return $this->newNodeMapAll( $node ); case AFPTreeNode::ASSIGNMENT: list( $varname, $value ) = $node->children; return new AFPTreeNode( AFPTreeNode::FUNCTION_CALL, [ "set", new AFPTreeNode( AFPTreeNode::ATOM, new AFPToken( AFPToken::TSTRING, $varname, $node->position ), $node->position ), $this->desugar( $value ) ], $node->position ); default: // @codeCoverageIgnoreStart throw new InternalException( "Unknown node type passed: {$node->type}" ); // @codeCoverageIgnoreEnd } } /** * @param string $op * @param AFPTreeNode $left * @param AFPTreeNode $right * @param int $position * @return AFPTreeNode */ private function desugarAndOr( string $op, AFPTreeNode $left, AFPTreeNode $right, int $position ): AFPTreeNode { $trueNode = new AFPTreeNode( AFPTreeNode::ATOM, new AFPToken( AFPToken::TKEYWORD, "true", $position ), $position ); $falseNode = new AFPTreeNode( AFPTreeNode::ATOM, new AFPToken( AFPToken::TKEYWORD, "false", $position ), $position ); $conditionalNode = new AFPTreeNode( AFPTreeNode::CONDITIONAL, [ $right, $trueNode, $falseNode ], $position ); if ( $op === '&' ) { // & is supposed to be equivalent to // if then (if then true else false) else false end // See T237336 for why this is currently not the case. return new AFPTreeNode( AFPTreeNode::CONDITIONAL, [ $left, $conditionalNode, $falseNode ], $position ); } elseif ( $op === '|' ) { // | is supposed to be equivalent to // if then true else (if then true else false) end // See T237336 for why this is currently not the case. return new AFPTreeNode( AFPTreeNode::CONDITIONAL, [ $left, $trueNode, $conditionalNode ], $position ); } else { // @codeCoverageIgnoreStart throw new InternalException( "Unknown operator: {$op}" ); // @codeCoverageIgnoreEnd } } /** * Construct a new node with information based on the old node but * with different children * * @param AFPTreeNode $node * @param AFPTreeNode[]|string[]|AFPToken $children * @return AFPTreeNode */ private function newNode( AFPTreeNode $node, $children ): AFPTreeNode { return new AFPTreeNode( $node->type, $children, $node->position ); } /** * Construct a new node with information based on the old node but * with different type * * @param AFPTreeNode $node * @param string $type * @return AFPTreeNode */ private function newNodeReplaceType( AFPTreeNode $node, string $type ): AFPTreeNode { return new AFPTreeNode( $type, $node->children, $node->position ); } /** * Recursively desugar on all children * * @param AFPTreeNode $node * @return AFPTreeNode */ private function newNodeMapAll( AFPTreeNode $node ): AFPTreeNode { $children = $node->children; if ( !is_array( $children ) ) { // @codeCoverageIgnoreStart throw new LogicException( "Unexpected non-array children of an AFPTreeNode of type " . "{$node->type} at position {$node->position}" ); // @codeCoverageIgnoreEnd } return $this->newNode( $node, array_map( [ $this, 'desugar' ], $children ) ); } /** * Recursively desugar on all children except the first one * * @param AFPTreeNode $node * @return AFPTreeNode */ private function newNodeMapExceptFirst( AFPTreeNode $node ): AFPTreeNode { $items = [ $node->children[0] ]; $args = array_slice( $node->children, 1 ); foreach ( $args as $el ) { $items[] = $this->desugar( $el ); } return $this->newNode( $node, $items ); } /** * Convert a node with an operation into a BINOP * * @param AFPTreeNode $node * @return AFPTreeNode */ private function newNodeBinop( AFPTreeNode $node ): AFPTreeNode { return $this->newNodeReplaceType( $this->newNodeMapExceptFirst( $node ), AFPTreeNode::BINOP ); } /** * Convert a node without an operation into a BINOP with the specified operation * * @param AFPTreeNode $node * @param string $op * @return AFPTreeNode */ private function newNodeNamedBinop( AFPTreeNode $node, string $op ): AFPTreeNode { $items = $this->newNodeMapAll( $node )->children; array_unshift( $items, $op ); return $this->newNodeReplaceType( $this->newNode( $node, $items ), AFPTreeNode::BINOP ); } /** * - Statically compute what are bound after evaluating $node, * provided that variables in $bound are already bound. * - Similarly compute for each bound variable after evaluating $node * whether it is used provided that we already have $bound * that contains necessary information. * - Ensure function application's validity. * - Ensure that the first argument of set is a literal string. * - Ensure that all assignment is not done on built-in identifier. * * Precondition: * - The tree $node should be desugared and normalized. * * Postcondition: * - $node is guaranteed to have no unbound variables * provided that variables in $bound are already bound * (for the definition of unbound variable indicated by $this->mode) * - All function applications should be valid and have correct arity. * - The set function application's first argument should be * a literal string. * * @param AFPTreeNode $node * @param bool[] $bound Map of [ variable_name => used ] * @return bool[] Map of [ variable_name => used ] * @throws UserVisibleException * @throws InternalException */ private function check( AFPTreeNode $node, array $bound ): array { switch ( $node->type ) { // phpcs:ignore PSR2.ControlStructures.SwitchDeclaration.TerminatingComment case AFPTreeNode::ATOM: $tok = $node->children; switch ( $tok->type ) { case AFPToken::TID: return $this->lookupVar( $tok->value, $tok->pos, $bound ); case AFPToken::TSTRING: case AFPToken::TFLOAT: case AFPToken::TINT: case AFPToken::TKEYWORD: return $bound; default: // @codeCoverageIgnoreStart throw new InternalException( "Unknown token {$tok->type} provided in the ATOM node" ); // @codeCoverageIgnoreEnd } case AFPTreeNode::ARRAY_DEFINITION: // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here foreach ( $node->children as $el ) { $bound = $this->check( $el, $bound ); } return $bound; case AFPTreeNode::FUNCTION_CALL: $fname = $node->children[0]; $args = array_slice( $node->children, 1 ); if ( !array_key_exists( $fname, FilterEvaluator::FUNCTIONS ) ) { throw new UserVisibleException( 'unknownfunction', $node->position, [ $fname ] ); } $this->checkArgCount( $args, $fname, $node->position ); if ( $fname === 'set' ) { // arity is checked, so we know $args[0] and $args[1] exist $tok = $args[0]->children; if ( !( $tok instanceof AFPToken ) || $tok->type !== AFPToken::TSTRING ) { throw new UserVisibleException( 'variablevariable', $node->position, [] ); } $bound = $this->check( $args[1], $bound ); // set the variable as unused return $this->assignVar( $tok->value, $tok->pos, $bound ); } else { foreach ( $args as $arg ) { $bound = $this->check( $arg, $bound ); } return $bound; } case AFPTreeNode::BINOP: list( , $left, $right ) = $node->children; return $this->check( $right, $this->check( $left, $bound ) ); case AFPTreeNode::UNARY: list( , $argument ) = $node->children; return $this->check( $argument, $bound ); case AFPTreeNode::BOOL_INVERT: list( $argument ) = $node->children; return $this->check( $argument, $bound ); // phpcs:ignore PSR2.ControlStructures.SwitchDeclaration.TerminatingComment case AFPTreeNode::CONDITIONAL: list( $condition, $exprIfTrue, $exprIfFalse ) = $node->children; $bound = $this->check( $condition, $bound ); $boundLeft = $this->check( $exprIfTrue, $bound ); $boundRight = $this->check( $exprIfFalse, $bound ); switch ( $this->mode ) { case self::MCONSERVATIVE: return $this->mapUnion( $boundLeft, $boundRight ); case self::MLIBERAL: return $this->mapIntersect( $boundLeft, $boundRight ); default: // @codeCoverageIgnoreStart throw new LogicException( "Unknown mode: {$this->mode}" ); // @codeCoverageIgnoreEnd } case AFPTreeNode::INDEX_ASSIGNMENT: list( $varName, $offset, $value ) = $node->children; // deal with unbound $varName $bound = $this->lookupVar( $varName, $node->position, $bound ); $bound = $this->check( $offset, $bound ); $bound = $this->check( $value, $bound ); // deal with built-in $varName and set $varName as unused return $this->assignVar( $varName, $node->position, $bound ); case AFPTreeNode::ARRAY_APPEND: list( $varName, $value ) = $node->children; // deal with unbound $varName $bound = $this->lookupVar( $varName, $node->position, $bound ); $bound = $this->check( $value, $bound ); // deal with built-in $varName and set $varName as unused return $this->assignVar( $varName, $node->position, $bound ); case AFPTreeNode::SEMICOLON: // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here foreach ( $node->children as $statement ) { $bound = $this->check( $statement, $bound ); } return $bound; default: // @codeCoverageIgnoreStart throw new LogicException( "Unknown type: {$node->type}" ); // @codeCoverageIgnoreEnd } } /** * @param array $left * @param array $right * @return array */ private function mapUnion( array $left, array $right ): array { foreach ( $right as $key => $val ) { if ( array_key_exists( $key, $left ) ) { $left[ $key ] = $left[ $key ] || $val; } else { $left[ $key ] = $val; } } return $left; } /** * @param array $left * @param array $right * @return array */ private function mapIntersect( array $left, array $right ): array { $keys = array_intersect_key( $left, $right ); $result = []; foreach ( $keys as $key => $val ) { $result[ $key ] = $left[ $key ] || $right[ $key ]; } return $result; } /** * @param string $var * @param int $pos * @param array $bound * @return array */ private function assignVar( string $var, int $pos, array $bound ): array { $var = strtolower( $var ); if ( $this->isReservedIdentifier( $var ) ) { throw new UserVisibleException( 'overridebuiltin', $pos, [ $var ] ); } $bound[ $var ] = false; return $bound; } /** * @param string $var * @param int $pos * @param array $bound * @return array */ private function lookupVar( string $var, int $pos, array $bound ): array { $var = strtolower( $var ); if ( array_key_exists( $var, $bound ) ) { // user-defined variable $bound[ $var ] = true; return $bound; } elseif ( $this->keywordsManager->isVarDisabled( $var ) ) { // disabled built-in variables throw new UserVisibleException( 'disabledvar', $pos, [ $var ] ); } elseif ( $this->keywordsManager->varExists( $var ) ) { // non-disabled built-in variables return $bound; } elseif ( $this->isReservedIdentifier( $var ) ) { // other built-in identifiers throw new UserVisibleException( 'usebuiltin', $pos, [ $var ] ); } else { // unbound variables throw new UserVisibleException( 'unrecognisedvar', $pos, [ $var ] ); } } /** * Check that a built-in function has been provided the right amount of arguments * * @param array $args The arguments supplied to the function * @param string $func The function name * @param int $position * @throws UserVisibleException */ protected function checkArgCount( array $args, string $func, int $position ): void { if ( !array_key_exists( $func, FilterEvaluator::FUNC_ARG_COUNT ) ) { // @codeCoverageIgnoreStart throw new InvalidArgumentException( "$func is not a valid function." ); // @codeCoverageIgnoreEnd } list( $min, $max ) = FilterEvaluator::FUNC_ARG_COUNT[ $func ]; if ( count( $args ) < $min ) { throw new UserVisibleException( $min === 1 ? 'noparams' : 'notenoughargs', $position, [ $func, $min, count( $args ) ] ); } elseif ( count( $args ) > $max ) { throw new UserVisibleException( 'toomanyargs', $position, [ $func, $max, count( $args ) ] ); } } /** * Check whether the given name is a reserved identifier, e.g. the name of a built-in variable, * function, or keyword. * * @param string $name * @return bool */ protected function isReservedIdentifier( string $name ): bool { return $this->keywordsManager->varExists( $name ) || array_key_exists( $name, FilterEvaluator::FUNCTIONS ) || // We need to check for true, false, if/then/else etc. because, even if they have a different // AFPToken type, they may be used inside set/set_var() in_array( $name, AbuseFilterTokenizer::KEYWORDS, true ); } }