mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/AbuseFilter.git
synced 2024-11-27 15:30:42 +00:00
Merge "Annotate the AST with var names before caching the AST"
This commit is contained in:
commit
b8ad85cac7
|
@ -1,26 +1,21 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* A class representing a whole AST generated by AFPTreeParser, holding AFPTreeNode's and a list
|
||||
* of custom variable names.
|
||||
* A class representing a whole AST generated by AFPTreeParser, holding AFPTreeNode's. This wrapper
|
||||
* could be expanded in the future. For now, it's mostly useful for typehints, and to have an
|
||||
* evalTree function in the CachingParser.
|
||||
*/
|
||||
class AFPSyntaxTree {
|
||||
/**
|
||||
* @var AFPTreeNode|null
|
||||
*/
|
||||
private $rootNode;
|
||||
/**
|
||||
* @var string[]
|
||||
*/
|
||||
private $variableNames;
|
||||
|
||||
/**
|
||||
* @param string[] $variableNames
|
||||
* @param AFPTreeNode|null $root
|
||||
*/
|
||||
public function __construct( array $variableNames, AFPTreeNode $root = null ) {
|
||||
public function __construct( AFPTreeNode $root = null ) {
|
||||
$this->rootNode = $root;
|
||||
$this->variableNames = $variableNames;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -29,11 +24,4 @@ class AFPSyntaxTree {
|
|||
public function getRoot() {
|
||||
return $this->rootNode;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getVariableNames() {
|
||||
return $this->variableNames;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -86,9 +86,17 @@ class AFPTreeNode {
|
|||
*/
|
||||
public $children;
|
||||
|
||||
// Position used for error reporting.
|
||||
/** @var int Position used for error reporting. */
|
||||
public $position;
|
||||
|
||||
/**
|
||||
* @var string[] Names of the variables assigned in this node or any of its descendants
|
||||
* @todo We could change this to be an instance of a new AFPScope class (holding a var map)
|
||||
* if we'll have the need to store other scope-specific data,
|
||||
* see <https://phabricator.wikimedia.org/T230982#5475400>.
|
||||
*/
|
||||
private $innerAssignments = [];
|
||||
|
||||
/**
|
||||
* @param string $type
|
||||
* @param AFPTreeNode[]|string[]|AFPToken $children
|
||||
|
@ -98,6 +106,54 @@ class AFPTreeNode {
|
|||
$this->type = $type;
|
||||
$this->children = $children;
|
||||
$this->position = $position;
|
||||
$this->populateInnerAssignments();
|
||||
}
|
||||
|
||||
/**
|
||||
* Save in this node all the variable names used in the children, and in this node if it's an
|
||||
* assignment-related node. Note that this doesn't check whether the variable is custom or builtin:
|
||||
* this is already checked when calling setUserVariable.
|
||||
* In case we'll ever need to store other data in a node, or maybe even a Scope object, this could
|
||||
* be moved to a different class which could also re-visit the whole AST.
|
||||
*/
|
||||
private function populateInnerAssignments() {
|
||||
if ( $this->type === self::ATOM ) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
$this->type === self::ASSIGNMENT ||
|
||||
$this->type === self::INDEX_ASSIGNMENT ||
|
||||
$this->type === self::ARRAY_APPEND
|
||||
) {
|
||||
$this->innerAssignments = [ $this->children[0] ];
|
||||
} elseif (
|
||||
$this->type === self::FUNCTION_CALL &&
|
||||
in_array( $this->children[0], [ 'set', 'set_var' ] ) &&
|
||||
// If unset, parsing will fail when checking arguments
|
||||
isset( $this->children[1] )
|
||||
) {
|
||||
$varnameNode = $this->children[1];
|
||||
if ( $varnameNode->type !== self::ATOM ) {
|
||||
// Shouldn't happen since variable variables are not allowed
|
||||
throw new AFPException( "Got non-atom type {$varnameNode->type} for set_var" );
|
||||
}
|
||||
$this->innerAssignments = [ $varnameNode->children->value ];
|
||||
}
|
||||
|
||||
// @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach ATOM excluded above
|
||||
foreach ( $this->children as $child ) {
|
||||
if ( $child instanceof self ) {
|
||||
$this->innerAssignments = array_merge( $this->innerAssignments, $child->getInnerAssignments() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getInnerAssignments() : array {
|
||||
return $this->innerAssignments;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -32,11 +32,6 @@ class AFPTreeParser {
|
|||
|
||||
const CACHE_VERSION = 2;
|
||||
|
||||
/**
|
||||
* @var bool[] Custom variable names, set of [ name => true ]
|
||||
*/
|
||||
private $variablesNames;
|
||||
|
||||
/**
|
||||
* @var BagOStuff Used to cache tokens
|
||||
*/
|
||||
|
@ -70,7 +65,6 @@ class AFPTreeParser {
|
|||
public function resetState() {
|
||||
$this->mTokens = [];
|
||||
$this->mPos = 0;
|
||||
$this->variablesNames = [];
|
||||
$this->mFilter = null;
|
||||
}
|
||||
|
||||
|
@ -132,8 +126,7 @@ class AFPTreeParser {
|
|||
*/
|
||||
public function buildSyntaxTree() : AFPSyntaxTree {
|
||||
$root = $this->doLevelEntry();
|
||||
$variables = array_keys( $this->variablesNames );
|
||||
return new AFPSyntaxTree( $variables, $root );
|
||||
return new AFPSyntaxTree( $root );
|
||||
}
|
||||
|
||||
/* Levels */
|
||||
|
@ -207,6 +200,7 @@ class AFPTreeParser {
|
|||
|
||||
// Speculatively parse the assignment statement assuming it can
|
||||
// potentially be an assignment, but roll back if it isn't.
|
||||
// @todo Use $this->getNextToken for clearer code
|
||||
$initialState = $this->getState();
|
||||
$this->move();
|
||||
|
||||
|
@ -214,7 +208,6 @@ class AFPTreeParser {
|
|||
$position = $this->mPos;
|
||||
$this->move();
|
||||
$value = $this->doLevelSet();
|
||||
$this->variablesNames[ $varname ] = true;
|
||||
|
||||
return new AFPTreeNode( AFPTreeNode::ASSIGNMENT, [ $varname, $value ], $position );
|
||||
}
|
||||
|
@ -240,7 +233,6 @@ class AFPTreeParser {
|
|||
$position = $this->mPos;
|
||||
$this->move();
|
||||
$value = $this->doLevelSet();
|
||||
$this->variablesNames[ $varname ] = true;
|
||||
if ( $index === 'append' ) {
|
||||
return new AFPTreeNode(
|
||||
AFPTreeNode::ARRAY_APPEND, [ $varname, $value ], $position );
|
||||
|
@ -599,7 +591,6 @@ class AFPTreeParser {
|
|||
) {
|
||||
throw new AFPUserVisibleException( 'variablevariable', $this->mPos, [] );
|
||||
} else {
|
||||
$this->variablesNames[ $this->mCur->value ] = true;
|
||||
$this->setState( $state );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,6 +54,7 @@ class AbuseFilterCachingParser extends AbuseFilterParser {
|
|||
*/
|
||||
public function intEval( $code ) : AFPData {
|
||||
$tree = $this->getTree( $code );
|
||||
|
||||
$res = $this->evalTree( $tree );
|
||||
|
||||
if ( $res->getType() === AFPData::DUNDEFINED ) {
|
||||
|
@ -350,49 +351,21 @@ class AbuseFilterCachingParser extends AbuseFilterParser {
|
|||
* or
|
||||
* if ( false ) then ( var := 'foo' ) else ( 1 ) end; var == 2
|
||||
* where `false` is something evaluated as false at runtime.
|
||||
* In the future, we may decide to always throw in those cases (for stability and parser speed),
|
||||
* but that's not good, at least as long as people don't have an on-wiki way to see if filters
|
||||
* are failing at runtime.
|
||||
* @todo Decide about that.
|
||||
*
|
||||
* @note This method doesn't check whether the variable exists in case of index assignments.
|
||||
* Hence, in `false & (nonexistent[] := 2)`, `nonexistent` would be hoisted without errors.
|
||||
* However, that would by caught by checkSyntax, so we can avoid checking here: we'd need
|
||||
* way more context than we currently have.
|
||||
*
|
||||
* @param AFPTreeNode $node
|
||||
*/
|
||||
private function discardWithHoisting( AFPTreeNode $node ) {
|
||||
if (
|
||||
$node->type === AFPTreeNode::ASSIGNMENT &&
|
||||
!$this->mVariables->varIsSet( $node->children[0] )
|
||||
) {
|
||||
$this->setUserVariable( $node->children[0], new AFPData( AFPData::DUNDEFINED ) );
|
||||
} elseif (
|
||||
$node->type === AFPTreeNode::INDEX_ASSIGNMENT ||
|
||||
$node->type === AFPTreeNode::ARRAY_APPEND
|
||||
) {
|
||||
$varName = $node->children[0];
|
||||
if ( !$this->mVariables->varIsSet( $varName ) ) {
|
||||
throw new AFPUserVisibleException( 'unrecognisedvar', $node->position, [ $varName ] );
|
||||
}
|
||||
$this->setUserVariable( $varName, new AFPData( AFPData::DUNDEFINED ) );
|
||||
} elseif (
|
||||
$node->type === AFPTreeNode::FUNCTION_CALL &&
|
||||
in_array( $node->children[0], [ 'set', 'set_var' ] ) &&
|
||||
isset( $node->children[1] )
|
||||
) {
|
||||
$varnameNode = $node->children[1];
|
||||
if ( $varnameNode->type !== AFPTreeNode::ATOM ) {
|
||||
// Shouldn't happen since variable variables are not allowed
|
||||
throw new AFPException( "Got non-atom type {$varnameNode->type} for set_var" );
|
||||
}
|
||||
$varname = $varnameNode->children->value;
|
||||
if ( !$this->mVariables->varIsSet( $varname ) ) {
|
||||
$this->setUserVariable( $varname, new AFPData( AFPData::DUNDEFINED ) );
|
||||
}
|
||||
} elseif ( $node->type === AFPTreeNode::ATOM ) {
|
||||
return;
|
||||
}
|
||||
// @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach ATOM case excluded above
|
||||
foreach ( $node->children as $child ) {
|
||||
if ( $child instanceof AFPTreeNode ) {
|
||||
$this->discardWithHoisting( $child );
|
||||
foreach ( $node->getInnerAssignments() as $name ) {
|
||||
if (
|
||||
!$this->mVariables->varIsSet( $name ) ||
|
||||
$this->mVariables->getVar( $name )->getType() === AFPData::DARRAY
|
||||
) {
|
||||
$this->setUserVariable( $name, new AFPData( AFPData::DUNDEFINED ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -280,8 +280,6 @@ class AbuseFilterParserTest extends AbuseFilterParserTestCase {
|
|||
[ 'a[1] := 5', 'getVarValue' ],
|
||||
[ 'a[] := 5', 'getVarValue' ],
|
||||
[ 'a = 5', 'getVarValue' ],
|
||||
[ 'false & ( nonexistent[1] := 2 )', 'skipOverBraces/discardWithHoisting' ],
|
||||
[ 'false & ( nonexistent[] := 2 )', 'skipOverBraces/discardWithHoisting' ],
|
||||
];
|
||||
}
|
||||
|
||||
|
@ -461,6 +459,8 @@ class AbuseFilterParserTest extends AbuseFilterParserTestCase {
|
|||
[ 'added_lines[] := 1', 'doLevelSet' ],
|
||||
[ 'added_lines[3] := 1', 'doLevelSet' ],
|
||||
[ 'page_id[3] := 1', 'doLevelSet' ],
|
||||
[ 'true | (added_lines := 1);','setUserVariable' ],
|
||||
[ 'if(true) then 1 else (added_lines := 1) end;', 'setUserVariable' ],
|
||||
];
|
||||
}
|
||||
|
||||
|
@ -839,9 +839,11 @@ class AbuseFilterParserTest extends AbuseFilterParserTestCase {
|
|||
public function provideVarDeclarationInSkippedBlock() {
|
||||
return [
|
||||
[ "x := [5]; false & (1 == 1; y := 'b'; x[1] := 'x'; 3 < 4); y != 'b' & x[1] != 'x'" ],
|
||||
[ "(var := [1]); false & ( var[] := 'baz' ); var contains 'baz'" ],
|
||||
[ "(var := [1]); false & ( var[] := 'baz' ); count(var) > -1" ],
|
||||
[ "(var := [1]); false & ( var[1] := 'baz' ); var[1] === 'baz'" ],
|
||||
[ "false & (set('myvar', 1)); myvar contains 1" ],
|
||||
[ "false & ( ( false & ( var := [1] ) ) | ( var[] := 2 ) ); var" ],
|
||||
[ "false & ( ( false & ( var := [1] ); true ) | ( var[] := 2 ) ); var" ],
|
||||
// The following tests are to ensure that we don't get a match
|
||||
[ "false & ( var := 'foo'; x := get_matches( var, added_lines )[1] ); x != false" ],
|
||||
[ "false & ( var := 'foo'); var !== null" ],
|
||||
|
|
Loading…
Reference in a new issue