Merge "Add a base class for parser transition"

This commit is contained in:
jenkins-bot 2019-12-15 19:09:08 +00:00 committed by Gerrit Code Review
commit 62f7b574f1
6 changed files with 156 additions and 191 deletions

View file

@ -159,6 +159,7 @@
"AFPException": "includes/parser/AFPException.php",
"AFPParserState": "includes/parser/AFPParserState.php",
"AFPToken": "includes/parser/AFPToken.php",
"AFPTransitionBase": "includes/parser/AFPTransitionBase.php",
"AFPTreeNode": "includes/parser/AFPTreeNode.php",
"AFPSyntaxTree": "includes/parser/AFPSyntaxTree.php",
"AFPTreeParser": "includes/parser/AFPTreeParser.php",

View file

@ -0,0 +1,151 @@
<?php
/**
* Base parse-related class to be used while the old parser is being phased out
*
* @internal This is a temporary class until things are settled down
*/
abstract class AFPTransitionBase {
public const FUNCTIONS = [
'lcase' => 'funcLc',
'ucase' => 'funcUc',
'length' => 'funcLen',
'string' => 'castString',
'int' => 'castInt',
'float' => 'castFloat',
'bool' => 'castBool',
'norm' => 'funcNorm',
'ccnorm' => 'funcCCNorm',
'ccnorm_contains_any' => 'funcCCNormContainsAny',
'ccnorm_contains_all' => 'funcCCNormContainsAll',
'specialratio' => 'funcSpecialRatio',
'rmspecials' => 'funcRMSpecials',
'rmdoubles' => 'funcRMDoubles',
'rmwhitespace' => 'funcRMWhitespace',
'count' => 'funcCount',
'rcount' => 'funcRCount',
'get_matches' => 'funcGetMatches',
'ip_in_range' => 'funcIPInRange',
'contains_any' => 'funcContainsAny',
'contains_all' => 'funcContainsAll',
'equals_to_any' => 'funcEqualsToAny',
'substr' => 'funcSubstr',
'strlen' => 'funcLen',
'strpos' => 'funcStrPos',
'str_replace' => 'funcStrReplace',
'rescape' => 'funcStrRegexEscape',
'set' => 'funcSetVar',
'set_var' => 'funcSetVar',
'sanitize' => 'funcSanitize',
];
/**
* The minimum and maximum amount of arguments required by each function.
* @var int[][]
*/
const FUNC_ARG_COUNT = [
'lcase' => [ 1, 1 ],
'ucase' => [ 1, 1 ],
'length' => [ 1, 1 ],
'string' => [ 1, 1 ],
'int' => [ 1, 1 ],
'float' => [ 1, 1 ],
'bool' => [ 1, 1 ],
'norm' => [ 1, 1 ],
'ccnorm' => [ 1, 1 ],
'ccnorm_contains_any' => [ 2, INF ],
'ccnorm_contains_all' => [ 2, INF ],
'specialratio' => [ 1, 1 ],
'rmspecials' => [ 1, 1 ],
'rmdoubles' => [ 1, 1 ],
'rmwhitespace' => [ 1, 1 ],
'count' => [ 1, 2 ],
'rcount' => [ 1, 2 ],
'get_matches' => [ 2, 2 ],
'ip_in_range' => [ 2, 2 ],
'contains_any' => [ 2, INF ],
'contains_all' => [ 2, INF ],
'equals_to_any' => [ 2, INF ],
'substr' => [ 2, 3 ],
'strlen' => [ 1, 1 ],
'strpos' => [ 2, 3 ],
'str_replace' => [ 3, 3 ],
'rescape' => [ 1, 1 ],
'set' => [ 2, 2 ],
'set_var' => [ 2, 2 ],
'sanitize' => [ 1, 1 ],
];
/**
* @var int The position of the current token
*/
protected $mPos;
/**
* Check that a built-in function has been provided the right amount of arguments
*
* @param array $args The arguments supplied to the function
* @param string $func The function name
* @throws AFPUserVisibleException
*/
protected function checkArgCount( $args, $func ) {
if ( !array_key_exists( $func, self::FUNC_ARG_COUNT ) ) {
throw new InvalidArgumentException( "$func is not a valid function." );
}
list( $min, $max ) = self::FUNC_ARG_COUNT[ $func ];
if ( count( $args ) < $min ) {
throw new AFPUserVisibleException(
$min === 1 ? 'noparams' : 'notenoughargs',
$this->mPos,
[ $func, $min, count( $args ) ]
);
} elseif ( count( $args ) > $max ) {
throw new AFPUserVisibleException(
'toomanyargs',
$this->mPos,
[ $func, $max, count( $args ) ]
);
}
}
/**
* Check whether the given name is a reserved identifier, e.g. the name of a built-in variable,
* function, or keyword.
*
* @param string $name
* @return bool
*/
protected function isReservedIdentifier( $name ) {
return $this->isBuiltinVar( $name ) ||
array_key_exists( $name, self::FUNCTIONS ) ||
// We need to check for true, false, if/then/else etc. because, even if they have a different
// AFPToken type, they may be used inside set/set_var()
in_array( $name, AbuseFilterTokenizer::KEYWORDS, true );
}
/**
* Check whether the given name refers to a built-in variable, including
* deprecated and disabled variables.
*
* @param string $varname
* @return bool
*/
protected function isBuiltinVar( $varname ) {
$builderValues = AbuseFilter::getBuilderValues();
$deprecatedVars = AbuseFilter::getDeprecatedVariables();
return array_key_exists( $varname, $builderValues['vars'] ) ||
array_key_exists( $varname, AbuseFilter::DISABLED_VARS ) ||
array_key_exists( $varname, $deprecatedVars );
}
/**
* @param string $fname
* @return bool
*/
protected function functionIsVariadic( $fname ) {
if ( !array_key_exists( $fname, self::FUNC_ARG_COUNT ) ) {
throw new InvalidArgumentException( "Function $fname is not valid" );
}
return self::FUNC_ARG_COUNT[$fname][1] === INF;
}
}

View file

@ -13,7 +13,7 @@ use Psr\Log\LoggerInterface;
/**
* A parser that transforms the text of the filter into a parse tree.
*/
class AFPTreeParser {
class AFPTreeParser extends AFPTransitionBase {
/**
* @var array[] Contains the AFPTokens for the code being parsed
*/
@ -22,10 +22,6 @@ class AFPTreeParser {
* @var AFPToken The current token
*/
public $mCur;
/**
* @var int The position of the current token
*/
public $mPos;
/**
* @var string|null The ID of the filter being parsed, if available. Can also be "global-$ID"
*/
@ -735,46 +731,4 @@ class AFPTreeParser {
$this->move();
return $result;
}
/**
* Check that a built-in function has been provided the right amount of arguments
*
* @param array $args The arguments supplied to the function
* @param string $func The function name
* @throws AFPUserVisibleException
* @see AbuseFilterParser::checkArgCount()
* @todo This is a duplicate of AbuseFilter::checkEnoughArguments, and such duplication
* should be avoided when merging the parsers.
*/
protected function checkArgCount( $args, $func ) {
if ( !array_key_exists( $func, AbuseFilterParser::FUNC_ARG_COUNT ) ) {
throw new InvalidArgumentException( "$func is not a valid function." );
}
list( $min, $max ) = AbuseFilterParser::FUNC_ARG_COUNT[ $func ];
if ( count( $args ) < $min ) {
throw new AFPUserVisibleException(
$min === 1 ? 'noparams' : 'notenoughargs',
$this->mCur->pos,
[ $func, $min, count( $args ) ]
);
} elseif ( count( $args ) > $max ) {
throw new AFPUserVisibleException(
'toomanyargs',
$this->mCur->pos,
[ $func, $max, count( $args ) ]
);
}
}
/**
* @param string $fname
* @return bool
* @see AbuseFilterParser::functionIsVariadic
*/
protected function functionIsVariadic( $fname ) {
if ( !array_key_exists( $fname, AbuseFilterParser::FUNC_ARG_COUNT ) ) {
throw new InvalidArgumentException( "Function $fname is not valid" );
}
return AbuseFilterParser::FUNC_ARG_COUNT[$fname][1] === INF;
}
}

View file

@ -3,15 +3,11 @@
use Psr\Log\LoggerInterface;
use Wikimedia\Equivset\Equivset;
class AbuseFilterParser {
class AbuseFilterParser extends AFPTransitionBase {
/**
* @var array[] Contains the AFPTokens for the code being parsed
*/
public $mTokens;
/**
* @var int The position of the current token
*/
public $mPos;
/**
* @var bool Are we inside a short circuit evaluation?
*/
@ -65,76 +61,6 @@ class AbuseFilterParser {
*/
protected $statsd;
public const FUNCTIONS = [
'lcase' => 'funcLc',
'ucase' => 'funcUc',
'length' => 'funcLen',
'string' => 'castString',
'int' => 'castInt',
'float' => 'castFloat',
'bool' => 'castBool',
'norm' => 'funcNorm',
'ccnorm' => 'funcCCNorm',
'ccnorm_contains_any' => 'funcCCNormContainsAny',
'ccnorm_contains_all' => 'funcCCNormContainsAll',
'specialratio' => 'funcSpecialRatio',
'rmspecials' => 'funcRMSpecials',
'rmdoubles' => 'funcRMDoubles',
'rmwhitespace' => 'funcRMWhitespace',
'count' => 'funcCount',
'rcount' => 'funcRCount',
'get_matches' => 'funcGetMatches',
'ip_in_range' => 'funcIPInRange',
'contains_any' => 'funcContainsAny',
'contains_all' => 'funcContainsAll',
'equals_to_any' => 'funcEqualsToAny',
'substr' => 'funcSubstr',
'strlen' => 'funcLen',
'strpos' => 'funcStrPos',
'str_replace' => 'funcStrReplace',
'rescape' => 'funcStrRegexEscape',
'set' => 'funcSetVar',
'set_var' => 'funcSetVar',
'sanitize' => 'funcSanitize',
];
/**
* The minimum and maximum amount of arguments required by each function.
* @var int[][]
*/
const FUNC_ARG_COUNT = [
'lcase' => [ 1, 1 ],
'ucase' => [ 1, 1 ],
'length' => [ 1, 1 ],
'string' => [ 1, 1 ],
'int' => [ 1, 1 ],
'float' => [ 1, 1 ],
'bool' => [ 1, 1 ],
'norm' => [ 1, 1 ],
'ccnorm' => [ 1, 1 ],
'ccnorm_contains_any' => [ 2, INF ],
'ccnorm_contains_all' => [ 2, INF ],
'specialratio' => [ 1, 1 ],
'rmspecials' => [ 1, 1 ],
'rmdoubles' => [ 1, 1 ],
'rmwhitespace' => [ 1, 1 ],
'count' => [ 1, 2 ],
'rcount' => [ 1, 2 ],
'get_matches' => [ 2, 2 ],
'ip_in_range' => [ 2, 2 ],
'contains_any' => [ 2, INF ],
'contains_all' => [ 2, INF ],
'equals_to_any' => [ 2, INF ],
'substr' => [ 2, 3 ],
'strlen' => [ 1, 1 ],
'strpos' => [ 2, 3 ],
'str_replace' => [ 3, 3 ],
'rescape' => [ 1, 1 ],
'set' => [ 2, 2 ],
'set_var' => [ 2, 2 ],
'sanitize' => [ 1, 1 ],
];
// Functions that affect parser state, and shouldn't be cached.
public const ACTIVE_FUNCTIONS = [
'funcSetVar',
@ -1166,37 +1092,6 @@ class AbuseFilterParser {
return $this->mVariables->getVar( $var, $flags, $this->mFilter );
}
/**
* Check whether the given name refers to a built-in variable, including
* deprecated and disabled variables.
*
* @param string $varname
* @return bool
*/
protected function isBuiltinVar( $varname ) {
$builderValues = AbuseFilter::getBuilderValues();
$deprecatedVars = AbuseFilter::getDeprecatedVariables();
return array_key_exists( $varname, $builderValues['vars'] ) ||
array_key_exists( $varname, AbuseFilter::DISABLED_VARS ) ||
array_key_exists( $varname, $deprecatedVars );
}
/**
* Check whether the given name is a reserved identifier, e.g. the name of a built-in variable,
* function, or keyword.
*
* @param string $name
* @return bool
*/
protected function isReservedIdentifier( $name ) {
return $this->isBuiltinVar( $name ) ||
array_key_exists( $name, self::FUNCTIONS ) ||
// We need to check for true, false, if/then/else etc. because, even if they have a different
// AFPToken type, they may be used inside set/set_var()
in_array( $name, AbuseFilterTokenizer::KEYWORDS, true );
}
/**
* @param string $name
* @param mixed $value
@ -1209,33 +1104,6 @@ class AbuseFilterParser {
$this->mVariables->setVar( $name, $value );
}
/**
* Check that a built-in function has been provided the right amount of arguments
*
* @param array $args The arguments supplied to the function
* @param string $func The function name
* @throws AFPUserVisibleException
*/
protected function checkArgCount( $args, $func ) {
if ( !array_key_exists( $func, self::FUNC_ARG_COUNT ) ) {
throw new InvalidArgumentException( "$func is not a valid function." );
}
list( $min, $max ) = self::FUNC_ARG_COUNT[ $func ];
if ( count( $args ) < $min ) {
throw new AFPUserVisibleException(
$min === 1 ? 'noparams' : 'notenoughargs',
$this->mCur->pos,
[ $func, $min, count( $args ) ]
);
} elseif ( count( $args ) > $max ) {
throw new AFPUserVisibleException(
'toomanyargs',
$this->mCur->pos,
[ $func, $max, count( $args ) ]
);
}
}
/**
* Helper to call a built-in function.
*
@ -1964,15 +1832,4 @@ class AbuseFilterParser {
}
}
/**
* @param string $fname
* @return bool
* @see AFPTreeParser::functionIsVariadic
*/
protected function functionIsVariadic( $fname ) {
if ( !array_key_exists( $fname, self::FUNC_ARG_COUNT ) ) {
throw new InvalidArgumentException( "Function $fname is not valid" );
}
return self::FUNC_ARG_COUNT[$fname][1] === INF;
}
}

View file

@ -25,6 +25,7 @@
*
* @covers AbuseFilterCachingParser
* @covers AFPTreeParser
* @covers AFPTransitionBase
* @covers AFPTreeNode
* @covers AFPParserState
* @covers AbuseFilterParser

View file

@ -30,6 +30,7 @@ use Psr\Log\NullLogger;
*
* @covers AbuseFilterCachingParser
* @covers AFPTreeParser
* @covers AFPTransitionBase
* @covers AFPTreeNode
* @covers AFPParserState
* @covers AbuseFilterParser