Remove old number syntax

Bug: T212730
Change-Id: I7573da1683efc83b5002b8948c97dd7f6658a488
This commit is contained in:
Daimona Eaytoy 2019-11-13 14:26:46 +01:00 committed by Jforrester
parent 1bac110205
commit b9a1e86245
3 changed files with 8 additions and 61 deletions

View file

@ -7,19 +7,15 @@ use Psr\Log\LoggerInterface;
*/
class AbuseFilterTokenizer {
/** @var int Tokenizer cache version. Increment this when changing the syntax. */
public const CACHE_VERSION = 3;
public const CACHE_VERSION = 4;
private const COMMENT_START_RE = '/\s*\/\*/A';
private const ID_SYMBOL_RE = '/[0-9A-Za-z_]+/A';
public const OPERATOR_RE =
'/(\!\=\=|\!\=|\!|\*\*|\*|\/|\+|\-|%|&|\||\^|\:\=|\?|\:|\<\=|\<|\>\=|\>|\=\=\=|\=\=|\=)/A';
/** @deprecated In favour of V2 */
public const RADIX_RE = '/([0-9A-Fa-f]+(?:\.\d*)?|\.\d+)([bxo])?(?![a-z])/Au';
private const BASE = '0(?<base>[xbo])';
private const DIGIT = '[0-9A-Fa-f]';
private const DIGITS = self::DIGIT . '+' . '(?:\.\d*)?|\.\d+';
// New numbers regex. Note that the last lookahead can be changed to (?!self::DIGIT) once we
// drop the old syntax
private const RADIX_RE_V2 = '/(?:' . self::BASE . ')?(?<input>' . self::DIGITS . ')(?!\w)/Au';
private const RADIX_RE = '/(?:' . self::BASE . ')?(?<input>' . self::DIGITS . ')(?!\w)/Au';
private const WHITESPACE = "\011\012\013\014\015\040";
// Order is important. The punctuation-matching regex requires that
@ -188,52 +184,12 @@ class AbuseFilterTokenizer {
// Numbers
$matchesv2 = [];
if ( preg_match( self::RADIX_RE_V2, $code, $matchesv2, 0, $offset ) ) {
// Experimental new syntax for non-decimal numbers, T212730
if ( preg_match( self::RADIX_RE, $code, $matchesv2, 0, $offset ) ) {
$token = $matchesv2[0];
$baseChar = $matchesv2['base'];
$input = $matchesv2['input'];
$base = $baseChar ? self::BASES[$baseChar] : 10;
if ( preg_match( self::BASE_CHARS_RES[$base], $input ) ) {
if ( $base !== 10 ) {
// This is to check that the new syntax is working. Remove when removing the old syntax
$this->logger->info(
'Successfully parsed a non-decimal number with new syntax. ' .
'Base: {number_base}, number: {number_input}',
[ 'number_base' => $base, 'number_input' => $input ]
);
}
$num = $base !== 10 ? base_convert( $input, $base, 10 ) : $input;
$offset += strlen( $token );
return ( strpos( $input, '.' ) !== false )
? new AFPToken( AFPToken::TFLOAT, floatval( $num ), $start )
: new AFPToken( AFPToken::TINT, intval( $num ), $start );
}
}
if ( preg_match( self::RADIX_RE, $code, $matches, 0, $offset ) ) {
list( $token, $input ) = $matches;
$baseChar = $matches[2] ?? null;
// Sometimes the base char gets mixed in with the rest of it because
// the regex targets hex, too.
// This mostly happens with binary
if ( !$baseChar && !empty( self::BASES[ substr( $input, - 1 ) ] ) ) {
$baseChar = substr( $input, - 1, 1 );
$input = substr( $input, 0, - 1 );
}
$base = $baseChar ? self::BASES[$baseChar] : 10;
// Check against the appropriate character class for input validation
if ( preg_match( self::BASE_CHARS_RES[$base], $input ) ) {
if ( $base !== 10 ) {
// Old syntax, this is deprecated
$this->logger->warning(
'DEPRECATED! This syntax for non-decimal numbers has been deprecated in 1.34 and will ' .
'be removed in 1.35. Please switch to the new syntax, which is the same ' .
'as PHP\'s. Found number with base: {number_base}, integer part: {number_input}.',
[ 'number_base' => $base, 'number_input' => $input ]
);
}
$num = $base !== 10 ? base_convert( $input, $base, 10 ) : $input;
$offset += strlen( $token );
return ( strpos( $input, '.' ) !== false )

View file

@ -1,8 +1,9 @@
/* Old syntax, deprecated */
ax = 10 & 123x === 291 & Fx = 15 & 10o = 8 & 1o === 1 & 10b = 2 & 101010b = 42
/* Things that are NOT numbers; */
0xfoo := 'foobar';
0b10bar := 'bar';
&
0xfoo === 'foobar' & 0b10bar === 'bar' &
/* New syntax */
/* Actual numbers */
0x1A === 0x1a & 0x1a === 26 & 0xa === 10 & 0b11111111 === 255 & 0o123 === 83 & 0x123 === 291 & 0xF === 15 &
0o10 === 8 & 0o1 === 1 & 0b101010 === 42 & 0b101010 === 0x2a & 0x2a === 0o52

View file

@ -158,16 +158,6 @@ class AbuseFilterParserTest extends AbuseFilterParserTestCase {
$this->assertEquals( $operatorRe, AbuseFilterTokenizer::OPERATOR_RE );
}
/**
* Ensure that AbuseFilterTokenizer::RADIX_RE matches the contents
* and order of AbuseFilterTokenizer::$bases.
*/
public function testRadixRe() {
$baseClass = implode( '', array_keys( AbuseFilterTokenizer::BASES ) );
$radixRe = "/([0-9A-Fa-f]+(?:\.\d*)?|\.\d+)([$baseClass])?(?![a-z])/Au";
$this->assertEquals( $radixRe, AbuseFilterTokenizer::RADIX_RE );
}
/**
* Ensure the number of conditions counted for given expressions is right.
*