Replace preg_replace_callback with strtr in CommentParser

It does the same as before.

I think performance is not a concern here, and wasn't my motivation
either. But I hope this makes the code easier to read and to reason
with.

I added a pure unit test case (without involving an actual Language
object) to cover the previously uncovered digits feature.

Change-Id: I6a0fc86035817eabb42b55e58183ae094c052aa6
This commit is contained in:
thiemowmde 2023-10-30 09:20:27 +01:00
parent f2265f918f
commit 10dcd1f847
5 changed files with 35 additions and 35 deletions

View file

@ -317,7 +317,7 @@ class CommentParser {
*
* @param string $contLangVariant Content language variant
* @param string $format Date format, as used by MediaWiki
* @param string[]|null $digits Localised digits from 0 to 9, e.g. `[ '0', '1', ..., '9' ]`
* @param array<int,string>|null $digits Localised digits from 0 to 9, e.g. `[ '0', '1', ..., '9' ]`
* @param string $localTimezone Local timezone IANA name, e.g. `America/New_York`
* @param array $tzAbbrs Map of localised timezone abbreviations to IANA abbreviations
* for the local timezone, e.g. [ 'EDT' => 'EDT', 'EST' => 'EST' ]
@ -326,17 +326,8 @@ class CommentParser {
private function getTimestampParser(
string $contLangVariant, string $format, ?array $digits, string $localTimezone, array $tzAbbrs
): callable {
$untransformDigits = static function ( string $text ) use ( $digits ) {
if ( !$digits ) {
return $text;
}
return preg_replace_callback(
'/[' . implode( '', $digits ) . ']/u',
static function ( array $m ) use ( $digits ) {
return (string)array_search( $m[0], $digits, true );
},
$text
);
$untransformDigits = static function ( string $text ) use ( $digits ): int {
return (int)( $digits ? strtr( $text, array_flip( $digits ) ) : $text );
};
$formatLength = strlen( $format );
@ -418,7 +409,7 @@ class CommentParser {
break;
case 'd':
case 'j':
$day = intval( $untransformDigits( $text ) );
$day = $untransformDigits( $text );
break;
case 'D':
case 'l':
@ -440,21 +431,21 @@ class CommentParser {
break;
case 'm':
case 'n':
$monthIdx = intval( $untransformDigits( $text ) ) - 1;
$monthIdx = $untransformDigits( $text ) - 1;
break;
case 'Y':
$year = intval( $untransformDigits( $text ) );
$year = $untransformDigits( $text );
break;
case 'xkY':
// Thai year
$year = intval( $untransformDigits( $text ) ) - 543;
$year = $untransformDigits( $text ) - 543;
break;
case 'G':
case 'H':
$hour = intval( $untransformDigits( $text ) );
$hour = $untransformDigits( $text );
break;
case 'i':
$minute = intval( $untransformDigits( $text ) );
$minute = $untransformDigits( $text );
break;
case 's':
// Seconds - unused, because most timestamp formats omit them

View file

@ -291,16 +291,15 @@ Parser.prototype.getTimestampParser = function ( contLangVariant, format, digits
}
}
/**
* @param {string} text
* @return {number}
*/
function untransformDigits( text ) {
if ( !digits ) {
return text;
}
return text.replace(
return Number( digits ? text.replace(
new RegExp( '[' + digits.join( '' ) + ']', 'g' ),
function ( m ) {
return digits.indexOf( m );
}
);
( m ) => digits.indexOf( m )
) : text );
}
var parser = this;
@ -339,7 +338,7 @@ Parser.prototype.getTimestampParser = function ( contLangVariant, format, digits
break;
case 'd':
case 'j':
day = Number( untransformDigits( text ) );
day = untransformDigits( text );
break;
case 'D':
case 'l':
@ -360,21 +359,21 @@ Parser.prototype.getTimestampParser = function ( contLangVariant, format, digits
break;
case 'm':
case 'n':
monthIdx = Number( untransformDigits( text ) ) - 1;
monthIdx = untransformDigits( text ) - 1;
break;
case 'Y':
year = Number( untransformDigits( text ) );
year = untransformDigits( text );
break;
case 'xkY':
// Thai year
year = Number( untransformDigits( text ) ) - 543;
year = untransformDigits( text ) - 543;
break;
case 'G':
case 'H':
hour = Number( untransformDigits( text ) );
hour = untransformDigits( text );
break;
case 'i':
minute = Number( untransformDigits( text ) );
minute = untransformDigits( text );
break;
case 's':
// Seconds - unused, because most timestamp formats omit them

View file

@ -1,20 +1,30 @@
[
{
"format": "Y n j D H i",
"digits": null,
"data": [ null, "2011", "2", "3", "unused", "04", "05", "UTC" ],
"expected": "2011-02-03T04:05:00+00:00",
"message": "Date is parsed"
},
{
"format": "xkY xg d \"asdf\" G i",
"digits": null,
"data": [ null, "2554", "February", "03", "4", "05", "UTC" ],
"expected": "2011-02-03T04:05:00+00:00",
"message": "Date is parsed"
},
{
"format": "H i n j Y",
"digits": null,
"data": [ null, "04", "05", "2", "3", "2011", "UTC" ],
"expected": "2011-02-03T04:05:00+00:00",
"message": "Date is parsed"
},
{
"format": "Y-m-d",
"digits": [ "⁰", "¹", "²" ],
"data": [ null, "²⁰²¹", "¹²", "⁰¹", "UTC" ],
"expected": "2021-12-01T00:00:00+00:00",
"message": "Localized digits are un-transformed"
}
]

View file

@ -140,7 +140,7 @@ class CommentParserTest extends IntegrationTestCase {
* @dataProvider provideTimestampParser
*/
public function testGetTimestampParser(
string $format, array $data, string $expected, string $message
string $format, ?array $digits, array $data, string $expected, string $message
): void {
/** @var CommentParser $parser */
$parser = TestingAccessWrapper::newFromObject(
@ -149,7 +149,7 @@ class CommentParserTest extends IntegrationTestCase {
$expected = new DateTimeImmutable( $expected );
$tsParser = $parser->getTimestampParser( 'en', $format, null, 'UTC', [ 'UTC' => 'UTC' ] );
$tsParser = $parser->getTimestampParser( 'en', $format, $digits, 'UTC', [ 'UTC' => 'UTC' ] );
static::assertEquals( $expected, $tsParser( $data )['date'], $message );
}

View file

@ -23,7 +23,7 @@ QUnit.test( '#getTimestampParser', function ( assert ) {
parser = new Parser( require( '../data-en.json' ) );
cases.forEach( function ( caseItem ) {
var tsParser = parser.getTimestampParser( 'en', caseItem.format, null, 'UTC', { UTC: 'UTC' } ),
var tsParser = parser.getTimestampParser( 'en', caseItem.format, caseItem.digits, 'UTC', { UTC: 'UTC' } ),
expectedDate = moment( caseItem.expected );
assert.true(