mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/AbuseFilter.git
synced 2024-09-24 02:38:16 +00:00
Merge "Add a base class for parser transition"
This commit is contained in:
commit
62f7b574f1
|
@ -159,6 +159,7 @@
|
|||
"AFPException": "includes/parser/AFPException.php",
|
||||
"AFPParserState": "includes/parser/AFPParserState.php",
|
||||
"AFPToken": "includes/parser/AFPToken.php",
|
||||
"AFPTransitionBase": "includes/parser/AFPTransitionBase.php",
|
||||
"AFPTreeNode": "includes/parser/AFPTreeNode.php",
|
||||
"AFPSyntaxTree": "includes/parser/AFPSyntaxTree.php",
|
||||
"AFPTreeParser": "includes/parser/AFPTreeParser.php",
|
||||
|
|
151
includes/parser/AFPTransitionBase.php
Normal file
151
includes/parser/AFPTransitionBase.php
Normal file
|
@ -0,0 +1,151 @@
|
|||
<?php
|
||||
/**
|
||||
* Base parse-related class to be used while the old parser is being phased out
|
||||
*
|
||||
* @internal This is a temporary class until things are settled down
|
||||
*/
|
||||
abstract class AFPTransitionBase {
|
||||
public const FUNCTIONS = [
|
||||
'lcase' => 'funcLc',
|
||||
'ucase' => 'funcUc',
|
||||
'length' => 'funcLen',
|
||||
'string' => 'castString',
|
||||
'int' => 'castInt',
|
||||
'float' => 'castFloat',
|
||||
'bool' => 'castBool',
|
||||
'norm' => 'funcNorm',
|
||||
'ccnorm' => 'funcCCNorm',
|
||||
'ccnorm_contains_any' => 'funcCCNormContainsAny',
|
||||
'ccnorm_contains_all' => 'funcCCNormContainsAll',
|
||||
'specialratio' => 'funcSpecialRatio',
|
||||
'rmspecials' => 'funcRMSpecials',
|
||||
'rmdoubles' => 'funcRMDoubles',
|
||||
'rmwhitespace' => 'funcRMWhitespace',
|
||||
'count' => 'funcCount',
|
||||
'rcount' => 'funcRCount',
|
||||
'get_matches' => 'funcGetMatches',
|
||||
'ip_in_range' => 'funcIPInRange',
|
||||
'contains_any' => 'funcContainsAny',
|
||||
'contains_all' => 'funcContainsAll',
|
||||
'equals_to_any' => 'funcEqualsToAny',
|
||||
'substr' => 'funcSubstr',
|
||||
'strlen' => 'funcLen',
|
||||
'strpos' => 'funcStrPos',
|
||||
'str_replace' => 'funcStrReplace',
|
||||
'rescape' => 'funcStrRegexEscape',
|
||||
'set' => 'funcSetVar',
|
||||
'set_var' => 'funcSetVar',
|
||||
'sanitize' => 'funcSanitize',
|
||||
];
|
||||
|
||||
/**
|
||||
* The minimum and maximum amount of arguments required by each function.
|
||||
* @var int[][]
|
||||
*/
|
||||
const FUNC_ARG_COUNT = [
|
||||
'lcase' => [ 1, 1 ],
|
||||
'ucase' => [ 1, 1 ],
|
||||
'length' => [ 1, 1 ],
|
||||
'string' => [ 1, 1 ],
|
||||
'int' => [ 1, 1 ],
|
||||
'float' => [ 1, 1 ],
|
||||
'bool' => [ 1, 1 ],
|
||||
'norm' => [ 1, 1 ],
|
||||
'ccnorm' => [ 1, 1 ],
|
||||
'ccnorm_contains_any' => [ 2, INF ],
|
||||
'ccnorm_contains_all' => [ 2, INF ],
|
||||
'specialratio' => [ 1, 1 ],
|
||||
'rmspecials' => [ 1, 1 ],
|
||||
'rmdoubles' => [ 1, 1 ],
|
||||
'rmwhitespace' => [ 1, 1 ],
|
||||
'count' => [ 1, 2 ],
|
||||
'rcount' => [ 1, 2 ],
|
||||
'get_matches' => [ 2, 2 ],
|
||||
'ip_in_range' => [ 2, 2 ],
|
||||
'contains_any' => [ 2, INF ],
|
||||
'contains_all' => [ 2, INF ],
|
||||
'equals_to_any' => [ 2, INF ],
|
||||
'substr' => [ 2, 3 ],
|
||||
'strlen' => [ 1, 1 ],
|
||||
'strpos' => [ 2, 3 ],
|
||||
'str_replace' => [ 3, 3 ],
|
||||
'rescape' => [ 1, 1 ],
|
||||
'set' => [ 2, 2 ],
|
||||
'set_var' => [ 2, 2 ],
|
||||
'sanitize' => [ 1, 1 ],
|
||||
];
|
||||
|
||||
/**
|
||||
* @var int The position of the current token
|
||||
*/
|
||||
protected $mPos;
|
||||
|
||||
/**
|
||||
* Check that a built-in function has been provided the right amount of arguments
|
||||
*
|
||||
* @param array $args The arguments supplied to the function
|
||||
* @param string $func The function name
|
||||
* @throws AFPUserVisibleException
|
||||
*/
|
||||
protected function checkArgCount( $args, $func ) {
|
||||
if ( !array_key_exists( $func, self::FUNC_ARG_COUNT ) ) {
|
||||
throw new InvalidArgumentException( "$func is not a valid function." );
|
||||
}
|
||||
list( $min, $max ) = self::FUNC_ARG_COUNT[ $func ];
|
||||
if ( count( $args ) < $min ) {
|
||||
throw new AFPUserVisibleException(
|
||||
$min === 1 ? 'noparams' : 'notenoughargs',
|
||||
$this->mPos,
|
||||
[ $func, $min, count( $args ) ]
|
||||
);
|
||||
} elseif ( count( $args ) > $max ) {
|
||||
throw new AFPUserVisibleException(
|
||||
'toomanyargs',
|
||||
$this->mPos,
|
||||
[ $func, $max, count( $args ) ]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the given name is a reserved identifier, e.g. the name of a built-in variable,
|
||||
* function, or keyword.
|
||||
*
|
||||
* @param string $name
|
||||
* @return bool
|
||||
*/
|
||||
protected function isReservedIdentifier( $name ) {
|
||||
return $this->isBuiltinVar( $name ) ||
|
||||
array_key_exists( $name, self::FUNCTIONS ) ||
|
||||
// We need to check for true, false, if/then/else etc. because, even if they have a different
|
||||
// AFPToken type, they may be used inside set/set_var()
|
||||
in_array( $name, AbuseFilterTokenizer::KEYWORDS, true );
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the given name refers to a built-in variable, including
|
||||
* deprecated and disabled variables.
|
||||
*
|
||||
* @param string $varname
|
||||
* @return bool
|
||||
*/
|
||||
protected function isBuiltinVar( $varname ) {
|
||||
$builderValues = AbuseFilter::getBuilderValues();
|
||||
$deprecatedVars = AbuseFilter::getDeprecatedVariables();
|
||||
|
||||
return array_key_exists( $varname, $builderValues['vars'] ) ||
|
||||
array_key_exists( $varname, AbuseFilter::DISABLED_VARS ) ||
|
||||
array_key_exists( $varname, $deprecatedVars );
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $fname
|
||||
* @return bool
|
||||
*/
|
||||
protected function functionIsVariadic( $fname ) {
|
||||
if ( !array_key_exists( $fname, self::FUNC_ARG_COUNT ) ) {
|
||||
throw new InvalidArgumentException( "Function $fname is not valid" );
|
||||
}
|
||||
return self::FUNC_ARG_COUNT[$fname][1] === INF;
|
||||
}
|
||||
}
|
|
@ -13,7 +13,7 @@ use Psr\Log\LoggerInterface;
|
|||
/**
|
||||
* A parser that transforms the text of the filter into a parse tree.
|
||||
*/
|
||||
class AFPTreeParser {
|
||||
class AFPTreeParser extends AFPTransitionBase {
|
||||
/**
|
||||
* @var array[] Contains the AFPTokens for the code being parsed
|
||||
*/
|
||||
|
@ -22,10 +22,6 @@ class AFPTreeParser {
|
|||
* @var AFPToken The current token
|
||||
*/
|
||||
public $mCur;
|
||||
/**
|
||||
* @var int The position of the current token
|
||||
*/
|
||||
public $mPos;
|
||||
/**
|
||||
* @var string|null The ID of the filter being parsed, if available. Can also be "global-$ID"
|
||||
*/
|
||||
|
@ -735,46 +731,4 @@ class AFPTreeParser {
|
|||
$this->move();
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that a built-in function has been provided the right amount of arguments
|
||||
*
|
||||
* @param array $args The arguments supplied to the function
|
||||
* @param string $func The function name
|
||||
* @throws AFPUserVisibleException
|
||||
* @see AbuseFilterParser::checkArgCount()
|
||||
* @todo This is a duplicate of AbuseFilter::checkEnoughArguments, and such duplication
|
||||
* should be avoided when merging the parsers.
|
||||
*/
|
||||
protected function checkArgCount( $args, $func ) {
|
||||
if ( !array_key_exists( $func, AbuseFilterParser::FUNC_ARG_COUNT ) ) {
|
||||
throw new InvalidArgumentException( "$func is not a valid function." );
|
||||
}
|
||||
list( $min, $max ) = AbuseFilterParser::FUNC_ARG_COUNT[ $func ];
|
||||
if ( count( $args ) < $min ) {
|
||||
throw new AFPUserVisibleException(
|
||||
$min === 1 ? 'noparams' : 'notenoughargs',
|
||||
$this->mCur->pos,
|
||||
[ $func, $min, count( $args ) ]
|
||||
);
|
||||
} elseif ( count( $args ) > $max ) {
|
||||
throw new AFPUserVisibleException(
|
||||
'toomanyargs',
|
||||
$this->mCur->pos,
|
||||
[ $func, $max, count( $args ) ]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $fname
|
||||
* @return bool
|
||||
* @see AbuseFilterParser::functionIsVariadic
|
||||
*/
|
||||
protected function functionIsVariadic( $fname ) {
|
||||
if ( !array_key_exists( $fname, AbuseFilterParser::FUNC_ARG_COUNT ) ) {
|
||||
throw new InvalidArgumentException( "Function $fname is not valid" );
|
||||
}
|
||||
return AbuseFilterParser::FUNC_ARG_COUNT[$fname][1] === INF;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,15 +3,11 @@
|
|||
use Psr\Log\LoggerInterface;
|
||||
use Wikimedia\Equivset\Equivset;
|
||||
|
||||
class AbuseFilterParser {
|
||||
class AbuseFilterParser extends AFPTransitionBase {
|
||||
/**
|
||||
* @var array[] Contains the AFPTokens for the code being parsed
|
||||
*/
|
||||
public $mTokens;
|
||||
/**
|
||||
* @var int The position of the current token
|
||||
*/
|
||||
public $mPos;
|
||||
/**
|
||||
* @var bool Are we inside a short circuit evaluation?
|
||||
*/
|
||||
|
@ -65,76 +61,6 @@ class AbuseFilterParser {
|
|||
*/
|
||||
protected $statsd;
|
||||
|
||||
public const FUNCTIONS = [
|
||||
'lcase' => 'funcLc',
|
||||
'ucase' => 'funcUc',
|
||||
'length' => 'funcLen',
|
||||
'string' => 'castString',
|
||||
'int' => 'castInt',
|
||||
'float' => 'castFloat',
|
||||
'bool' => 'castBool',
|
||||
'norm' => 'funcNorm',
|
||||
'ccnorm' => 'funcCCNorm',
|
||||
'ccnorm_contains_any' => 'funcCCNormContainsAny',
|
||||
'ccnorm_contains_all' => 'funcCCNormContainsAll',
|
||||
'specialratio' => 'funcSpecialRatio',
|
||||
'rmspecials' => 'funcRMSpecials',
|
||||
'rmdoubles' => 'funcRMDoubles',
|
||||
'rmwhitespace' => 'funcRMWhitespace',
|
||||
'count' => 'funcCount',
|
||||
'rcount' => 'funcRCount',
|
||||
'get_matches' => 'funcGetMatches',
|
||||
'ip_in_range' => 'funcIPInRange',
|
||||
'contains_any' => 'funcContainsAny',
|
||||
'contains_all' => 'funcContainsAll',
|
||||
'equals_to_any' => 'funcEqualsToAny',
|
||||
'substr' => 'funcSubstr',
|
||||
'strlen' => 'funcLen',
|
||||
'strpos' => 'funcStrPos',
|
||||
'str_replace' => 'funcStrReplace',
|
||||
'rescape' => 'funcStrRegexEscape',
|
||||
'set' => 'funcSetVar',
|
||||
'set_var' => 'funcSetVar',
|
||||
'sanitize' => 'funcSanitize',
|
||||
];
|
||||
|
||||
/**
|
||||
* The minimum and maximum amount of arguments required by each function.
|
||||
* @var int[][]
|
||||
*/
|
||||
const FUNC_ARG_COUNT = [
|
||||
'lcase' => [ 1, 1 ],
|
||||
'ucase' => [ 1, 1 ],
|
||||
'length' => [ 1, 1 ],
|
||||
'string' => [ 1, 1 ],
|
||||
'int' => [ 1, 1 ],
|
||||
'float' => [ 1, 1 ],
|
||||
'bool' => [ 1, 1 ],
|
||||
'norm' => [ 1, 1 ],
|
||||
'ccnorm' => [ 1, 1 ],
|
||||
'ccnorm_contains_any' => [ 2, INF ],
|
||||
'ccnorm_contains_all' => [ 2, INF ],
|
||||
'specialratio' => [ 1, 1 ],
|
||||
'rmspecials' => [ 1, 1 ],
|
||||
'rmdoubles' => [ 1, 1 ],
|
||||
'rmwhitespace' => [ 1, 1 ],
|
||||
'count' => [ 1, 2 ],
|
||||
'rcount' => [ 1, 2 ],
|
||||
'get_matches' => [ 2, 2 ],
|
||||
'ip_in_range' => [ 2, 2 ],
|
||||
'contains_any' => [ 2, INF ],
|
||||
'contains_all' => [ 2, INF ],
|
||||
'equals_to_any' => [ 2, INF ],
|
||||
'substr' => [ 2, 3 ],
|
||||
'strlen' => [ 1, 1 ],
|
||||
'strpos' => [ 2, 3 ],
|
||||
'str_replace' => [ 3, 3 ],
|
||||
'rescape' => [ 1, 1 ],
|
||||
'set' => [ 2, 2 ],
|
||||
'set_var' => [ 2, 2 ],
|
||||
'sanitize' => [ 1, 1 ],
|
||||
];
|
||||
|
||||
// Functions that affect parser state, and shouldn't be cached.
|
||||
public const ACTIVE_FUNCTIONS = [
|
||||
'funcSetVar',
|
||||
|
@ -1166,37 +1092,6 @@ class AbuseFilterParser {
|
|||
return $this->mVariables->getVar( $var, $flags, $this->mFilter );
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the given name refers to a built-in variable, including
|
||||
* deprecated and disabled variables.
|
||||
*
|
||||
* @param string $varname
|
||||
* @return bool
|
||||
*/
|
||||
protected function isBuiltinVar( $varname ) {
|
||||
$builderValues = AbuseFilter::getBuilderValues();
|
||||
$deprecatedVars = AbuseFilter::getDeprecatedVariables();
|
||||
|
||||
return array_key_exists( $varname, $builderValues['vars'] ) ||
|
||||
array_key_exists( $varname, AbuseFilter::DISABLED_VARS ) ||
|
||||
array_key_exists( $varname, $deprecatedVars );
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the given name is a reserved identifier, e.g. the name of a built-in variable,
|
||||
* function, or keyword.
|
||||
*
|
||||
* @param string $name
|
||||
* @return bool
|
||||
*/
|
||||
protected function isReservedIdentifier( $name ) {
|
||||
return $this->isBuiltinVar( $name ) ||
|
||||
array_key_exists( $name, self::FUNCTIONS ) ||
|
||||
// We need to check for true, false, if/then/else etc. because, even if they have a different
|
||||
// AFPToken type, they may be used inside set/set_var()
|
||||
in_array( $name, AbuseFilterTokenizer::KEYWORDS, true );
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $name
|
||||
* @param mixed $value
|
||||
|
@ -1209,33 +1104,6 @@ class AbuseFilterParser {
|
|||
$this->mVariables->setVar( $name, $value );
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that a built-in function has been provided the right amount of arguments
|
||||
*
|
||||
* @param array $args The arguments supplied to the function
|
||||
* @param string $func The function name
|
||||
* @throws AFPUserVisibleException
|
||||
*/
|
||||
protected function checkArgCount( $args, $func ) {
|
||||
if ( !array_key_exists( $func, self::FUNC_ARG_COUNT ) ) {
|
||||
throw new InvalidArgumentException( "$func is not a valid function." );
|
||||
}
|
||||
list( $min, $max ) = self::FUNC_ARG_COUNT[ $func ];
|
||||
if ( count( $args ) < $min ) {
|
||||
throw new AFPUserVisibleException(
|
||||
$min === 1 ? 'noparams' : 'notenoughargs',
|
||||
$this->mCur->pos,
|
||||
[ $func, $min, count( $args ) ]
|
||||
);
|
||||
} elseif ( count( $args ) > $max ) {
|
||||
throw new AFPUserVisibleException(
|
||||
'toomanyargs',
|
||||
$this->mCur->pos,
|
||||
[ $func, $max, count( $args ) ]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to call a built-in function.
|
||||
*
|
||||
|
@ -1964,15 +1832,4 @@ class AbuseFilterParser {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $fname
|
||||
* @return bool
|
||||
* @see AFPTreeParser::functionIsVariadic
|
||||
*/
|
||||
protected function functionIsVariadic( $fname ) {
|
||||
if ( !array_key_exists( $fname, self::FUNC_ARG_COUNT ) ) {
|
||||
throw new InvalidArgumentException( "Function $fname is not valid" );
|
||||
}
|
||||
return self::FUNC_ARG_COUNT[$fname][1] === INF;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
*
|
||||
* @covers AbuseFilterCachingParser
|
||||
* @covers AFPTreeParser
|
||||
* @covers AFPTransitionBase
|
||||
* @covers AFPTreeNode
|
||||
* @covers AFPParserState
|
||||
* @covers AbuseFilterParser
|
||||
|
|
|
@ -30,6 +30,7 @@ use Psr\Log\NullLogger;
|
|||
*
|
||||
* @covers AbuseFilterCachingParser
|
||||
* @covers AFPTreeParser
|
||||
* @covers AFPTransitionBase
|
||||
* @covers AFPTreeNode
|
||||
* @covers AFPParserState
|
||||
* @covers AbuseFilterParser
|
||||
|
|
Loading…
Reference in a new issue