diff --git a/includes/parser/AbuseFilterTokenizer.php b/includes/parser/AbuseFilterTokenizer.php index 41f186aeb..b814b6705 100644 --- a/includes/parser/AbuseFilterTokenizer.php +++ b/includes/parser/AbuseFilterTokenizer.php @@ -7,19 +7,15 @@ use Psr\Log\LoggerInterface; */ class AbuseFilterTokenizer { /** @var int Tokenizer cache version. Increment this when changing the syntax. */ - public const CACHE_VERSION = 3; + public const CACHE_VERSION = 4; private const COMMENT_START_RE = '/\s*\/\*/A'; private const ID_SYMBOL_RE = '/[0-9A-Za-z_]+/A'; public const OPERATOR_RE = '/(\!\=\=|\!\=|\!|\*\*|\*|\/|\+|\-|%|&|\||\^|\:\=|\?|\:|\<\=|\<|\>\=|\>|\=\=\=|\=\=|\=)/A'; - /** @deprecated In favour of V2 */ - public const RADIX_RE = '/([0-9A-Fa-f]+(?:\.\d*)?|\.\d+)([bxo])?(?![a-z])/Au'; private const BASE = '0(?[xbo])'; private const DIGIT = '[0-9A-Fa-f]'; private const DIGITS = self::DIGIT . '+' . '(?:\.\d*)?|\.\d+'; - // New numbers regex. Note that the last lookahead can be changed to (?!self::DIGIT) once we - // drop the old syntax - private const RADIX_RE_V2 = '/(?:' . self::BASE . ')?(?' . self::DIGITS . ')(?!\w)/Au'; + private const RADIX_RE = '/(?:' . self::BASE . ')?(?' . self::DIGITS . ')(?!\w)/Au'; private const WHITESPACE = "\011\012\013\014\015\040"; // Order is important. The punctuation-matching regex requires that @@ -188,52 +184,12 @@ class AbuseFilterTokenizer { // Numbers $matchesv2 = []; - if ( preg_match( self::RADIX_RE_V2, $code, $matchesv2, 0, $offset ) ) { - // Experimental new syntax for non-decimal numbers, T212730 + if ( preg_match( self::RADIX_RE, $code, $matchesv2, 0, $offset ) ) { $token = $matchesv2[0]; $baseChar = $matchesv2['base']; $input = $matchesv2['input']; $base = $baseChar ? self::BASES[$baseChar] : 10; if ( preg_match( self::BASE_CHARS_RES[$base], $input ) ) { - if ( $base !== 10 ) { - // This is to check that the new syntax is working. Remove when removing the old syntax - $this->logger->info( - 'Successfully parsed a non-decimal number with new syntax. ' . - 'Base: {number_base}, number: {number_input}', - [ 'number_base' => $base, 'number_input' => $input ] - ); - } - $num = $base !== 10 ? base_convert( $input, $base, 10 ) : $input; - $offset += strlen( $token ); - return ( strpos( $input, '.' ) !== false ) - ? new AFPToken( AFPToken::TFLOAT, floatval( $num ), $start ) - : new AFPToken( AFPToken::TINT, intval( $num ), $start ); - } - } - if ( preg_match( self::RADIX_RE, $code, $matches, 0, $offset ) ) { - list( $token, $input ) = $matches; - $baseChar = $matches[2] ?? null; - // Sometimes the base char gets mixed in with the rest of it because - // the regex targets hex, too. - // This mostly happens with binary - if ( !$baseChar && !empty( self::BASES[ substr( $input, - 1 ) ] ) ) { - $baseChar = substr( $input, - 1, 1 ); - $input = substr( $input, 0, - 1 ); - } - - $base = $baseChar ? self::BASES[$baseChar] : 10; - - // Check against the appropriate character class for input validation - if ( preg_match( self::BASE_CHARS_RES[$base], $input ) ) { - if ( $base !== 10 ) { - // Old syntax, this is deprecated - $this->logger->warning( - 'DEPRECATED! This syntax for non-decimal numbers has been deprecated in 1.34 and will ' . - 'be removed in 1.35. Please switch to the new syntax, which is the same ' . - 'as PHP\'s. Found number with base: {number_base}, integer part: {number_input}.', - [ 'number_base' => $base, 'number_input' => $input ] - ); - } $num = $base !== 10 ? base_convert( $input, $base, 10 ) : $input; $offset += strlen( $token ); return ( strpos( $input, '.' ) !== false ) diff --git a/tests/parserTests/numbers.t b/tests/parserTests/numbers.t index 7bfe9cb1a..da9474b44 100644 --- a/tests/parserTests/numbers.t +++ b/tests/parserTests/numbers.t @@ -1,8 +1,9 @@ -/* Old syntax, deprecated */ -ax = 10 & 123x === 291 & Fx = 15 & 10o = 8 & 1o === 1 & 10b = 2 & 101010b = 42 +/* Things that are NOT numbers; */ +0xfoo := 'foobar'; +0b10bar := 'bar'; -& +0xfoo === 'foobar' & 0b10bar === 'bar' & -/* New syntax */ +/* Actual numbers */ 0x1A === 0x1a & 0x1a === 26 & 0xa === 10 & 0b11111111 === 255 & 0o123 === 83 & 0x123 === 291 & 0xF === 15 & 0o10 === 8 & 0o1 === 1 & 0b101010 === 42 & 0b101010 === 0x2a & 0x2a === 0o52 diff --git a/tests/phpunit/unit/AbuseFilterParserTest.php b/tests/phpunit/unit/AbuseFilterParserTest.php index 51e19456e..e294ae517 100644 --- a/tests/phpunit/unit/AbuseFilterParserTest.php +++ b/tests/phpunit/unit/AbuseFilterParserTest.php @@ -158,16 +158,6 @@ class AbuseFilterParserTest extends AbuseFilterParserTestCase { $this->assertEquals( $operatorRe, AbuseFilterTokenizer::OPERATOR_RE ); } - /** - * Ensure that AbuseFilterTokenizer::RADIX_RE matches the contents - * and order of AbuseFilterTokenizer::$bases. - */ - public function testRadixRe() { - $baseClass = implode( '', array_keys( AbuseFilterTokenizer::BASES ) ); - $radixRe = "/([0-9A-Fa-f]+(?:\.\d*)?|\.\d+)([$baseClass])?(?![a-z])/Au"; - $this->assertEquals( $radixRe, AbuseFilterTokenizer::RADIX_RE ); - } - /** * Ensure the number of conditions counted for given expressions is right. *