diff --git a/includes/CommentParser.php b/includes/CommentParser.php index b07c6f07a..00587eb8c 100644 --- a/includes/CommentParser.php +++ b/includes/CommentParser.php @@ -270,23 +270,23 @@ class CommentParser { * of matching the regexp returned by getTimestampRegexp() * * @param string $format Date format, as used by MediaWiki - * @param string|null $digits Localised digits from 0 to 9, e.g. `0123456789` + * @param array|null $digits Localised digits from 0 to 9, e.g. `[ '0', '1', ..., '9' ]` * @param string $localTimezone Local timezone IANA name, e.g. `America/New_York` * @param array $tzAbbrs Map of localised timezone abbreviations to IANA abbreviations * for the local timezone, e.g. [ 'EDT' => 'EDT', 'EST' => 'EST' ] * @return callable Parser function */ private function getTimestampParser( - string $format, ?string $digits, string $localTimezone, array $tzAbbrs + string $format, ?array $digits, string $localTimezone, array $tzAbbrs ) : callable { $untransformDigits = function ( string $text ) use ( $digits ) { if ( !$digits ) { return $text; } return preg_replace_callback( - '/[' . $digits . ']/', + '/[' . implode( '', $digits ) . ']/u', function ( array $m ) use ( $digits ) { - return (string)strpos( $digits, $m[0] ); + return (string)array_search( $m[0], $digits ); }, $text ); @@ -453,7 +453,7 @@ class CommentParser { public function getLocalTimestampRegexp() : string { return $this->getTimestampRegexp( $this->dateFormat, - $this->digits ? "[$this->digits]" : '\\d', + $this->digits ? '[' . implode( '', $this->digits ) . ']' : '\\d', $this->timezones ); } diff --git a/includes/Data.php b/includes/Data.php index 8356ebd09..28b1013fd 100644 --- a/includes/Data.php +++ b/includes/Data.php @@ -43,10 +43,8 @@ class Data { $data['dateFormat'] = $lang->getDateFormatString( 'both', $lang->dateFormat( false ) ); - // TODO: We probably shouldn't assume that each digit can be represented by a single BMP - // codepoint in every language (although it seems to be true right now). $data['digits'] = $config->get( 'TranslateNumerals' ) ? - $lang->formatNum( '0123456789', true ) : + preg_split( '//u', $lang->formatNum( '0123456789', true ), -1, PREG_SPLIT_NO_EMPTY ) : null; // ApiQuerySiteinfo diff --git a/modules/Parser.js b/modules/Parser.js index 6dfe16ff9..781027d53 100644 --- a/modules/Parser.js +++ b/modules/Parser.js @@ -186,7 +186,7 @@ Parser.prototype.getTimestampRegexp = function ( format, digitsRegexp, tzAbbrs ) * * @private * @param {string} format Date format, as used by MediaWiki - * @param {string|null} digits Localised digits from 0 to 9, e.g. `0123456789` + * @param {array|null} digits Localised digits from 0 to 9, e.g. `[ '0', '1', ..., '9' ]` * @param {string} localTimezone Local timezone IANA name, e.g. `America/New_York` * @param {Object} tzAbbrs Map of localised timezone abbreviations to IANA abbreviations * for the local timezone, e.g. `{EDT: "EDT", EST: "EST"}` @@ -246,7 +246,7 @@ Parser.prototype.getTimestampParser = function ( format, digits, localTimezone, return text; } return text.replace( - new RegExp( '[' + digits + ']', 'g' ), + new RegExp( '[' + digits.join( '' ) + ']', 'g' ), function ( m ) { return digits.indexOf( m ); } @@ -371,7 +371,7 @@ Parser.prototype.getTimestampParser = function ( format, digits, localTimezone, Parser.prototype.getLocalTimestampRegexp = function () { return this.getTimestampRegexp( data.dateFormat, - data.digits ? '[' + data.digits + ']' : '\\d', + data.digits ? '[' + data.digits.join( '' ) + ']' : '\\d', data.timezones ); }; diff --git a/tests/data/arwiki-data.json b/tests/data/arwiki-data.json index c25728214..e3e1baa33 100644 --- a/tests/data/arwiki-data.json +++ b/tests/data/arwiki-data.json @@ -1,6 +1,17 @@ { "dateFormat": "H:i، j xg Y", - "digits": "0123456789", + "digits": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9" + ], "localTimezone": "UTC", "specialContributionsName": "مساهمات", "timezones": { diff --git a/tests/data/enwiki-data.json b/tests/data/enwiki-data.json index 1f784337c..87087b542 100644 --- a/tests/data/enwiki-data.json +++ b/tests/data/enwiki-data.json @@ -1,6 +1,17 @@ { "dateFormat": "H:i, j F Y", - "digits": "0123456789", + "digits": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9" + ], "localTimezone": "UTC", "specialContributionsName": "Contributions", "timezones": { diff --git a/tests/data/frwiki-data.json b/tests/data/frwiki-data.json index 6729eb1d7..56783d472 100644 --- a/tests/data/frwiki-data.json +++ b/tests/data/frwiki-data.json @@ -1,6 +1,17 @@ { "dateFormat": "j F Y à H:i", - "digits": "0123456789", + "digits": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9" + ], "localTimezone": "Europe/Paris", "specialContributionsName": "Contributions", "timezones": { diff --git a/tests/data/huwiki-data.json b/tests/data/huwiki-data.json index 907a5851b..7cb88fa5c 100644 --- a/tests/data/huwiki-data.json +++ b/tests/data/huwiki-data.json @@ -1,6 +1,17 @@ { "dateFormat": "Y. F j., H:i", - "digits": "0123456789", + "digits": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9" + ], "localTimezone": "Europe/Berlin", "specialContributionsName": "Szerkesztő_közreműködései", "timezones": { diff --git a/tests/data/nlwiki-data.json b/tests/data/nlwiki-data.json index 34f3a7a98..3dfcf5e2d 100644 --- a/tests/data/nlwiki-data.json +++ b/tests/data/nlwiki-data.json @@ -1,6 +1,17 @@ { "dateFormat": "j M Y H:i", - "digits": "0123456789", + "digits": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9" + ], "localTimezone": "Europe/Berlin", "specialContributionsName": "Bijdragen", "timezones": { diff --git a/tests/data/plwiki-data.json b/tests/data/plwiki-data.json index 9adfdd4f3..51f1040cf 100644 --- a/tests/data/plwiki-data.json +++ b/tests/data/plwiki-data.json @@ -1,6 +1,17 @@ { "dateFormat": "H:i, j M Y", - "digits": "0123456789", + "digits": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9" + ], "localTimezone": "Europe/Warsaw", "specialContributionsName": "Wkład", "timezones": {