Merge "Annotate the AST with var names before caching the AST"

This commit is contained in:
jenkins-bot 2019-09-14 01:03:53 +00:00 committed by Gerrit Code Review
commit b8ad85cac7
5 changed files with 80 additions and 70 deletions

View file

@ -1,26 +1,21 @@
<?php
/**
* A class representing a whole AST generated by AFPTreeParser, holding AFPTreeNode's and a list
* of custom variable names.
* A class representing a whole AST generated by AFPTreeParser, holding AFPTreeNode's. This wrapper
* could be expanded in the future. For now, it's mostly useful for typehints, and to have an
* evalTree function in the CachingParser.
*/
class AFPSyntaxTree {
/**
* @var AFPTreeNode|null
*/
private $rootNode;
/**
* @var string[]
*/
private $variableNames;
/**
* @param string[] $variableNames
* @param AFPTreeNode|null $root
*/
public function __construct( array $variableNames, AFPTreeNode $root = null ) {
public function __construct( AFPTreeNode $root = null ) {
$this->rootNode = $root;
$this->variableNames = $variableNames;
}
/**
@ -29,11 +24,4 @@ class AFPSyntaxTree {
public function getRoot() {
return $this->rootNode;
}
/**
* @return string[]
*/
public function getVariableNames() {
return $this->variableNames;
}
}

View file

@ -86,9 +86,17 @@ class AFPTreeNode {
*/
public $children;
// Position used for error reporting.
/** @var int Position used for error reporting. */
public $position;
/**
* @var string[] Names of the variables assigned in this node or any of its descendants
* @todo We could change this to be an instance of a new AFPScope class (holding a var map)
* if we'll have the need to store other scope-specific data,
* see <https://phabricator.wikimedia.org/T230982#5475400>.
*/
private $innerAssignments = [];
/**
* @param string $type
* @param AFPTreeNode[]|string[]|AFPToken $children
@ -98,6 +106,54 @@ class AFPTreeNode {
$this->type = $type;
$this->children = $children;
$this->position = $position;
$this->populateInnerAssignments();
}
/**
* Save in this node all the variable names used in the children, and in this node if it's an
* assignment-related node. Note that this doesn't check whether the variable is custom or builtin:
* this is already checked when calling setUserVariable.
* In case we'll ever need to store other data in a node, or maybe even a Scope object, this could
* be moved to a different class which could also re-visit the whole AST.
*/
private function populateInnerAssignments() {
if ( $this->type === self::ATOM ) {
return;
}
if (
$this->type === self::ASSIGNMENT ||
$this->type === self::INDEX_ASSIGNMENT ||
$this->type === self::ARRAY_APPEND
) {
$this->innerAssignments = [ $this->children[0] ];
} elseif (
$this->type === self::FUNCTION_CALL &&
in_array( $this->children[0], [ 'set', 'set_var' ] ) &&
// If unset, parsing will fail when checking arguments
isset( $this->children[1] )
) {
$varnameNode = $this->children[1];
if ( $varnameNode->type !== self::ATOM ) {
// Shouldn't happen since variable variables are not allowed
throw new AFPException( "Got non-atom type {$varnameNode->type} for set_var" );
}
$this->innerAssignments = [ $varnameNode->children->value ];
}
// @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach ATOM excluded above
foreach ( $this->children as $child ) {
if ( $child instanceof self ) {
$this->innerAssignments = array_merge( $this->innerAssignments, $child->getInnerAssignments() );
}
}
}
/**
* @return string[]
*/
public function getInnerAssignments() : array {
return $this->innerAssignments;
}
/**

View file

@ -32,11 +32,6 @@ class AFPTreeParser {
const CACHE_VERSION = 2;
/**
* @var bool[] Custom variable names, set of [ name => true ]
*/
private $variablesNames;
/**
* @var BagOStuff Used to cache tokens
*/
@ -70,7 +65,6 @@ class AFPTreeParser {
public function resetState() {
$this->mTokens = [];
$this->mPos = 0;
$this->variablesNames = [];
$this->mFilter = null;
}
@ -132,8 +126,7 @@ class AFPTreeParser {
*/
public function buildSyntaxTree() : AFPSyntaxTree {
$root = $this->doLevelEntry();
$variables = array_keys( $this->variablesNames );
return new AFPSyntaxTree( $variables, $root );
return new AFPSyntaxTree( $root );
}
/* Levels */
@ -207,6 +200,7 @@ class AFPTreeParser {
// Speculatively parse the assignment statement assuming it can
// potentially be an assignment, but roll back if it isn't.
// @todo Use $this->getNextToken for clearer code
$initialState = $this->getState();
$this->move();
@ -214,7 +208,6 @@ class AFPTreeParser {
$position = $this->mPos;
$this->move();
$value = $this->doLevelSet();
$this->variablesNames[ $varname ] = true;
return new AFPTreeNode( AFPTreeNode::ASSIGNMENT, [ $varname, $value ], $position );
}
@ -240,7 +233,6 @@ class AFPTreeParser {
$position = $this->mPos;
$this->move();
$value = $this->doLevelSet();
$this->variablesNames[ $varname ] = true;
if ( $index === 'append' ) {
return new AFPTreeNode(
AFPTreeNode::ARRAY_APPEND, [ $varname, $value ], $position );
@ -599,7 +591,6 @@ class AFPTreeParser {
) {
throw new AFPUserVisibleException( 'variablevariable', $this->mPos, [] );
} else {
$this->variablesNames[ $this->mCur->value ] = true;
$this->setState( $state );
}
}

View file

@ -54,6 +54,7 @@ class AbuseFilterCachingParser extends AbuseFilterParser {
*/
public function intEval( $code ) : AFPData {
$tree = $this->getTree( $code );
$res = $this->evalTree( $tree );
if ( $res->getType() === AFPData::DUNDEFINED ) {
@ -350,49 +351,21 @@ class AbuseFilterCachingParser extends AbuseFilterParser {
* or
* if ( false ) then ( var := 'foo' ) else ( 1 ) end; var == 2
* where `false` is something evaluated as false at runtime.
* In the future, we may decide to always throw in those cases (for stability and parser speed),
* but that's not good, at least as long as people don't have an on-wiki way to see if filters
* are failing at runtime.
* @todo Decide about that.
*
* @note This method doesn't check whether the variable exists in case of index assignments.
* Hence, in `false & (nonexistent[] := 2)`, `nonexistent` would be hoisted without errors.
* However, that would by caught by checkSyntax, so we can avoid checking here: we'd need
* way more context than we currently have.
*
* @param AFPTreeNode $node
*/
private function discardWithHoisting( AFPTreeNode $node ) {
if (
$node->type === AFPTreeNode::ASSIGNMENT &&
!$this->mVariables->varIsSet( $node->children[0] )
) {
$this->setUserVariable( $node->children[0], new AFPData( AFPData::DUNDEFINED ) );
} elseif (
$node->type === AFPTreeNode::INDEX_ASSIGNMENT ||
$node->type === AFPTreeNode::ARRAY_APPEND
) {
$varName = $node->children[0];
if ( !$this->mVariables->varIsSet( $varName ) ) {
throw new AFPUserVisibleException( 'unrecognisedvar', $node->position, [ $varName ] );
}
$this->setUserVariable( $varName, new AFPData( AFPData::DUNDEFINED ) );
} elseif (
$node->type === AFPTreeNode::FUNCTION_CALL &&
in_array( $node->children[0], [ 'set', 'set_var' ] ) &&
isset( $node->children[1] )
) {
$varnameNode = $node->children[1];
if ( $varnameNode->type !== AFPTreeNode::ATOM ) {
// Shouldn't happen since variable variables are not allowed
throw new AFPException( "Got non-atom type {$varnameNode->type} for set_var" );
}
$varname = $varnameNode->children->value;
if ( !$this->mVariables->varIsSet( $varname ) ) {
$this->setUserVariable( $varname, new AFPData( AFPData::DUNDEFINED ) );
}
} elseif ( $node->type === AFPTreeNode::ATOM ) {
return;
}
// @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach ATOM case excluded above
foreach ( $node->children as $child ) {
if ( $child instanceof AFPTreeNode ) {
$this->discardWithHoisting( $child );
foreach ( $node->getInnerAssignments() as $name ) {
if (
!$this->mVariables->varIsSet( $name ) ||
$this->mVariables->getVar( $name )->getType() === AFPData::DARRAY
) {
$this->setUserVariable( $name, new AFPData( AFPData::DUNDEFINED ) );
}
}
}

View file

@ -280,8 +280,6 @@ class AbuseFilterParserTest extends AbuseFilterParserTestCase {
[ 'a[1] := 5', 'getVarValue' ],
[ 'a[] := 5', 'getVarValue' ],
[ 'a = 5', 'getVarValue' ],
[ 'false & ( nonexistent[1] := 2 )', 'skipOverBraces/discardWithHoisting' ],
[ 'false & ( nonexistent[] := 2 )', 'skipOverBraces/discardWithHoisting' ],
];
}
@ -461,6 +459,8 @@ class AbuseFilterParserTest extends AbuseFilterParserTestCase {
[ 'added_lines[] := 1', 'doLevelSet' ],
[ 'added_lines[3] := 1', 'doLevelSet' ],
[ 'page_id[3] := 1', 'doLevelSet' ],
[ 'true | (added_lines := 1);','setUserVariable' ],
[ 'if(true) then 1 else (added_lines := 1) end;', 'setUserVariable' ],
];
}
@ -839,9 +839,11 @@ class AbuseFilterParserTest extends AbuseFilterParserTestCase {
public function provideVarDeclarationInSkippedBlock() {
return [
[ "x := [5]; false & (1 == 1; y := 'b'; x[1] := 'x'; 3 < 4); y != 'b' & x[1] != 'x'" ],
[ "(var := [1]); false & ( var[] := 'baz' ); var contains 'baz'" ],
[ "(var := [1]); false & ( var[] := 'baz' ); count(var) > -1" ],
[ "(var := [1]); false & ( var[1] := 'baz' ); var[1] === 'baz'" ],
[ "false & (set('myvar', 1)); myvar contains 1" ],
[ "false & ( ( false & ( var := [1] ) ) | ( var[] := 2 ) ); var" ],
[ "false & ( ( false & ( var := [1] ); true ) | ( var[] := 2 ) ); var" ],
// The following tests are to ensure that we don't get a match
[ "false & ( var := 'foo'; x := get_matches( var, added_lines )[1] ); x != false" ],
[ "false & ( var := 'foo'); var !== null" ],