mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/DiscussionTools
synced 2024-11-27 17:51:09 +00:00
Fix parsing localised digits in PHP discussion parser
The PHP code incorrectly assumed that the digits are single-byte in UTF-8, which is never the case (except for 0-9). The JS code worked correctly because it uses UTF-16 strings, so the bug would only affect non-BMP digits there. This was noted in a TODO comment, but we overlooked it when reimplementing in PHP. Instead of a string of 10 characters, use an array of 10 single-character strings. Bug: T261706 Change-Id: Ic5421382474c88f003424799c53ff473d99cce92
This commit is contained in:
parent
240c766768
commit
2d3fe47ac1
|
@ -270,23 +270,23 @@ class CommentParser {
|
|||
* of matching the regexp returned by getTimestampRegexp()
|
||||
*
|
||||
* @param string $format Date format, as used by MediaWiki
|
||||
* @param string|null $digits Localised digits from 0 to 9, e.g. `0123456789`
|
||||
* @param array|null $digits Localised digits from 0 to 9, e.g. `[ '0', '1', ..., '9' ]`
|
||||
* @param string $localTimezone Local timezone IANA name, e.g. `America/New_York`
|
||||
* @param array $tzAbbrs Map of localised timezone abbreviations to IANA abbreviations
|
||||
* for the local timezone, e.g. [ 'EDT' => 'EDT', 'EST' => 'EST' ]
|
||||
* @return callable Parser function
|
||||
*/
|
||||
private function getTimestampParser(
|
||||
string $format, ?string $digits, string $localTimezone, array $tzAbbrs
|
||||
string $format, ?array $digits, string $localTimezone, array $tzAbbrs
|
||||
) : callable {
|
||||
$untransformDigits = function ( string $text ) use ( $digits ) {
|
||||
if ( !$digits ) {
|
||||
return $text;
|
||||
}
|
||||
return preg_replace_callback(
|
||||
'/[' . $digits . ']/',
|
||||
'/[' . implode( '', $digits ) . ']/u',
|
||||
function ( array $m ) use ( $digits ) {
|
||||
return (string)strpos( $digits, $m[0] );
|
||||
return (string)array_search( $m[0], $digits );
|
||||
},
|
||||
$text
|
||||
);
|
||||
|
@ -453,7 +453,7 @@ class CommentParser {
|
|||
public function getLocalTimestampRegexp() : string {
|
||||
return $this->getTimestampRegexp(
|
||||
$this->dateFormat,
|
||||
$this->digits ? "[$this->digits]" : '\\d',
|
||||
$this->digits ? '[' . implode( '', $this->digits ) . ']' : '\\d',
|
||||
$this->timezones
|
||||
);
|
||||
}
|
||||
|
|
|
@ -43,10 +43,8 @@ class Data {
|
|||
|
||||
$data['dateFormat'] = $lang->getDateFormatString( 'both', $lang->dateFormat( false ) );
|
||||
|
||||
// TODO: We probably shouldn't assume that each digit can be represented by a single BMP
|
||||
// codepoint in every language (although it seems to be true right now).
|
||||
$data['digits'] = $config->get( 'TranslateNumerals' ) ?
|
||||
$lang->formatNum( '0123456789', true ) :
|
||||
preg_split( '//u', $lang->formatNum( '0123456789', true ), -1, PREG_SPLIT_NO_EMPTY ) :
|
||||
null;
|
||||
|
||||
// ApiQuerySiteinfo
|
||||
|
|
|
@ -186,7 +186,7 @@ Parser.prototype.getTimestampRegexp = function ( format, digitsRegexp, tzAbbrs )
|
|||
*
|
||||
* @private
|
||||
* @param {string} format Date format, as used by MediaWiki
|
||||
* @param {string|null} digits Localised digits from 0 to 9, e.g. `0123456789`
|
||||
* @param {array|null} digits Localised digits from 0 to 9, e.g. `[ '0', '1', ..., '9' ]`
|
||||
* @param {string} localTimezone Local timezone IANA name, e.g. `America/New_York`
|
||||
* @param {Object} tzAbbrs Map of localised timezone abbreviations to IANA abbreviations
|
||||
* for the local timezone, e.g. `{EDT: "EDT", EST: "EST"}`
|
||||
|
@ -246,7 +246,7 @@ Parser.prototype.getTimestampParser = function ( format, digits, localTimezone,
|
|||
return text;
|
||||
}
|
||||
return text.replace(
|
||||
new RegExp( '[' + digits + ']', 'g' ),
|
||||
new RegExp( '[' + digits.join( '' ) + ']', 'g' ),
|
||||
function ( m ) {
|
||||
return digits.indexOf( m );
|
||||
}
|
||||
|
@ -371,7 +371,7 @@ Parser.prototype.getTimestampParser = function ( format, digits, localTimezone,
|
|||
Parser.prototype.getLocalTimestampRegexp = function () {
|
||||
return this.getTimestampRegexp(
|
||||
data.dateFormat,
|
||||
data.digits ? '[' + data.digits + ']' : '\\d',
|
||||
data.digits ? '[' + data.digits.join( '' ) + ']' : '\\d',
|
||||
data.timezones
|
||||
);
|
||||
};
|
||||
|
|
|
@ -1,6 +1,17 @@
|
|||
{
|
||||
"dateFormat": "H:i، j xg Y",
|
||||
"digits": "0123456789",
|
||||
"digits": [
|
||||
"0",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"7",
|
||||
"8",
|
||||
"9"
|
||||
],
|
||||
"localTimezone": "UTC",
|
||||
"specialContributionsName": "مساهمات",
|
||||
"timezones": {
|
||||
|
|
|
@ -1,6 +1,17 @@
|
|||
{
|
||||
"dateFormat": "H:i, j F Y",
|
||||
"digits": "0123456789",
|
||||
"digits": [
|
||||
"0",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"7",
|
||||
"8",
|
||||
"9"
|
||||
],
|
||||
"localTimezone": "UTC",
|
||||
"specialContributionsName": "Contributions",
|
||||
"timezones": {
|
||||
|
|
|
@ -1,6 +1,17 @@
|
|||
{
|
||||
"dateFormat": "j F Y à H:i",
|
||||
"digits": "0123456789",
|
||||
"digits": [
|
||||
"0",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"7",
|
||||
"8",
|
||||
"9"
|
||||
],
|
||||
"localTimezone": "Europe/Paris",
|
||||
"specialContributionsName": "Contributions",
|
||||
"timezones": {
|
||||
|
|
|
@ -1,6 +1,17 @@
|
|||
{
|
||||
"dateFormat": "Y. F j., H:i",
|
||||
"digits": "0123456789",
|
||||
"digits": [
|
||||
"0",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"7",
|
||||
"8",
|
||||
"9"
|
||||
],
|
||||
"localTimezone": "Europe/Berlin",
|
||||
"specialContributionsName": "Szerkesztő_közreműködései",
|
||||
"timezones": {
|
||||
|
|
|
@ -1,6 +1,17 @@
|
|||
{
|
||||
"dateFormat": "j M Y H:i",
|
||||
"digits": "0123456789",
|
||||
"digits": [
|
||||
"0",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"7",
|
||||
"8",
|
||||
"9"
|
||||
],
|
||||
"localTimezone": "Europe/Berlin",
|
||||
"specialContributionsName": "Bijdragen",
|
||||
"timezones": {
|
||||
|
|
|
@ -1,6 +1,17 @@
|
|||
{
|
||||
"dateFormat": "H:i, j M Y",
|
||||
"digits": "0123456789",
|
||||
"digits": [
|
||||
"0",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"7",
|
||||
"8",
|
||||
"9"
|
||||
],
|
||||
"localTimezone": "Europe/Warsaw",
|
||||
"specialContributionsName": "Wkład",
|
||||
"timezones": {
|
||||
|
|
Loading…
Reference in a new issue