mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/DiscussionTools
synced 2024-11-27 17:51:09 +00:00
Fix parsing localised digits in PHP discussion parser
The PHP code incorrectly assumed that the digits are single-byte in UTF-8, which is never the case (except for 0-9). The JS code worked correctly because it uses UTF-16 strings, so the bug would only affect non-BMP digits there. This was noted in a TODO comment, but we overlooked it when reimplementing in PHP. Instead of a string of 10 characters, use an array of 10 single-character strings. Bug: T261706 Change-Id: Ic5421382474c88f003424799c53ff473d99cce92
This commit is contained in:
parent
240c766768
commit
2d3fe47ac1
|
@ -270,23 +270,23 @@ class CommentParser {
|
||||||
* of matching the regexp returned by getTimestampRegexp()
|
* of matching the regexp returned by getTimestampRegexp()
|
||||||
*
|
*
|
||||||
* @param string $format Date format, as used by MediaWiki
|
* @param string $format Date format, as used by MediaWiki
|
||||||
* @param string|null $digits Localised digits from 0 to 9, e.g. `0123456789`
|
* @param array|null $digits Localised digits from 0 to 9, e.g. `[ '0', '1', ..., '9' ]`
|
||||||
* @param string $localTimezone Local timezone IANA name, e.g. `America/New_York`
|
* @param string $localTimezone Local timezone IANA name, e.g. `America/New_York`
|
||||||
* @param array $tzAbbrs Map of localised timezone abbreviations to IANA abbreviations
|
* @param array $tzAbbrs Map of localised timezone abbreviations to IANA abbreviations
|
||||||
* for the local timezone, e.g. [ 'EDT' => 'EDT', 'EST' => 'EST' ]
|
* for the local timezone, e.g. [ 'EDT' => 'EDT', 'EST' => 'EST' ]
|
||||||
* @return callable Parser function
|
* @return callable Parser function
|
||||||
*/
|
*/
|
||||||
private function getTimestampParser(
|
private function getTimestampParser(
|
||||||
string $format, ?string $digits, string $localTimezone, array $tzAbbrs
|
string $format, ?array $digits, string $localTimezone, array $tzAbbrs
|
||||||
) : callable {
|
) : callable {
|
||||||
$untransformDigits = function ( string $text ) use ( $digits ) {
|
$untransformDigits = function ( string $text ) use ( $digits ) {
|
||||||
if ( !$digits ) {
|
if ( !$digits ) {
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
return preg_replace_callback(
|
return preg_replace_callback(
|
||||||
'/[' . $digits . ']/',
|
'/[' . implode( '', $digits ) . ']/u',
|
||||||
function ( array $m ) use ( $digits ) {
|
function ( array $m ) use ( $digits ) {
|
||||||
return (string)strpos( $digits, $m[0] );
|
return (string)array_search( $m[0], $digits );
|
||||||
},
|
},
|
||||||
$text
|
$text
|
||||||
);
|
);
|
||||||
|
@ -453,7 +453,7 @@ class CommentParser {
|
||||||
public function getLocalTimestampRegexp() : string {
|
public function getLocalTimestampRegexp() : string {
|
||||||
return $this->getTimestampRegexp(
|
return $this->getTimestampRegexp(
|
||||||
$this->dateFormat,
|
$this->dateFormat,
|
||||||
$this->digits ? "[$this->digits]" : '\\d',
|
$this->digits ? '[' . implode( '', $this->digits ) . ']' : '\\d',
|
||||||
$this->timezones
|
$this->timezones
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,10 +43,8 @@ class Data {
|
||||||
|
|
||||||
$data['dateFormat'] = $lang->getDateFormatString( 'both', $lang->dateFormat( false ) );
|
$data['dateFormat'] = $lang->getDateFormatString( 'both', $lang->dateFormat( false ) );
|
||||||
|
|
||||||
// TODO: We probably shouldn't assume that each digit can be represented by a single BMP
|
|
||||||
// codepoint in every language (although it seems to be true right now).
|
|
||||||
$data['digits'] = $config->get( 'TranslateNumerals' ) ?
|
$data['digits'] = $config->get( 'TranslateNumerals' ) ?
|
||||||
$lang->formatNum( '0123456789', true ) :
|
preg_split( '//u', $lang->formatNum( '0123456789', true ), -1, PREG_SPLIT_NO_EMPTY ) :
|
||||||
null;
|
null;
|
||||||
|
|
||||||
// ApiQuerySiteinfo
|
// ApiQuerySiteinfo
|
||||||
|
|
|
@ -186,7 +186,7 @@ Parser.prototype.getTimestampRegexp = function ( format, digitsRegexp, tzAbbrs )
|
||||||
*
|
*
|
||||||
* @private
|
* @private
|
||||||
* @param {string} format Date format, as used by MediaWiki
|
* @param {string} format Date format, as used by MediaWiki
|
||||||
* @param {string|null} digits Localised digits from 0 to 9, e.g. `0123456789`
|
* @param {array|null} digits Localised digits from 0 to 9, e.g. `[ '0', '1', ..., '9' ]`
|
||||||
* @param {string} localTimezone Local timezone IANA name, e.g. `America/New_York`
|
* @param {string} localTimezone Local timezone IANA name, e.g. `America/New_York`
|
||||||
* @param {Object} tzAbbrs Map of localised timezone abbreviations to IANA abbreviations
|
* @param {Object} tzAbbrs Map of localised timezone abbreviations to IANA abbreviations
|
||||||
* for the local timezone, e.g. `{EDT: "EDT", EST: "EST"}`
|
* for the local timezone, e.g. `{EDT: "EDT", EST: "EST"}`
|
||||||
|
@ -246,7 +246,7 @@ Parser.prototype.getTimestampParser = function ( format, digits, localTimezone,
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
return text.replace(
|
return text.replace(
|
||||||
new RegExp( '[' + digits + ']', 'g' ),
|
new RegExp( '[' + digits.join( '' ) + ']', 'g' ),
|
||||||
function ( m ) {
|
function ( m ) {
|
||||||
return digits.indexOf( m );
|
return digits.indexOf( m );
|
||||||
}
|
}
|
||||||
|
@ -371,7 +371,7 @@ Parser.prototype.getTimestampParser = function ( format, digits, localTimezone,
|
||||||
Parser.prototype.getLocalTimestampRegexp = function () {
|
Parser.prototype.getLocalTimestampRegexp = function () {
|
||||||
return this.getTimestampRegexp(
|
return this.getTimestampRegexp(
|
||||||
data.dateFormat,
|
data.dateFormat,
|
||||||
data.digits ? '[' + data.digits + ']' : '\\d',
|
data.digits ? '[' + data.digits.join( '' ) + ']' : '\\d',
|
||||||
data.timezones
|
data.timezones
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,6 +1,17 @@
|
||||||
{
|
{
|
||||||
"dateFormat": "H:i، j xg Y",
|
"dateFormat": "H:i، j xg Y",
|
||||||
"digits": "0123456789",
|
"digits": [
|
||||||
|
"0",
|
||||||
|
"1",
|
||||||
|
"2",
|
||||||
|
"3",
|
||||||
|
"4",
|
||||||
|
"5",
|
||||||
|
"6",
|
||||||
|
"7",
|
||||||
|
"8",
|
||||||
|
"9"
|
||||||
|
],
|
||||||
"localTimezone": "UTC",
|
"localTimezone": "UTC",
|
||||||
"specialContributionsName": "مساهمات",
|
"specialContributionsName": "مساهمات",
|
||||||
"timezones": {
|
"timezones": {
|
||||||
|
|
|
@ -1,6 +1,17 @@
|
||||||
{
|
{
|
||||||
"dateFormat": "H:i, j F Y",
|
"dateFormat": "H:i, j F Y",
|
||||||
"digits": "0123456789",
|
"digits": [
|
||||||
|
"0",
|
||||||
|
"1",
|
||||||
|
"2",
|
||||||
|
"3",
|
||||||
|
"4",
|
||||||
|
"5",
|
||||||
|
"6",
|
||||||
|
"7",
|
||||||
|
"8",
|
||||||
|
"9"
|
||||||
|
],
|
||||||
"localTimezone": "UTC",
|
"localTimezone": "UTC",
|
||||||
"specialContributionsName": "Contributions",
|
"specialContributionsName": "Contributions",
|
||||||
"timezones": {
|
"timezones": {
|
||||||
|
|
|
@ -1,6 +1,17 @@
|
||||||
{
|
{
|
||||||
"dateFormat": "j F Y à H:i",
|
"dateFormat": "j F Y à H:i",
|
||||||
"digits": "0123456789",
|
"digits": [
|
||||||
|
"0",
|
||||||
|
"1",
|
||||||
|
"2",
|
||||||
|
"3",
|
||||||
|
"4",
|
||||||
|
"5",
|
||||||
|
"6",
|
||||||
|
"7",
|
||||||
|
"8",
|
||||||
|
"9"
|
||||||
|
],
|
||||||
"localTimezone": "Europe/Paris",
|
"localTimezone": "Europe/Paris",
|
||||||
"specialContributionsName": "Contributions",
|
"specialContributionsName": "Contributions",
|
||||||
"timezones": {
|
"timezones": {
|
||||||
|
|
|
@ -1,6 +1,17 @@
|
||||||
{
|
{
|
||||||
"dateFormat": "Y. F j., H:i",
|
"dateFormat": "Y. F j., H:i",
|
||||||
"digits": "0123456789",
|
"digits": [
|
||||||
|
"0",
|
||||||
|
"1",
|
||||||
|
"2",
|
||||||
|
"3",
|
||||||
|
"4",
|
||||||
|
"5",
|
||||||
|
"6",
|
||||||
|
"7",
|
||||||
|
"8",
|
||||||
|
"9"
|
||||||
|
],
|
||||||
"localTimezone": "Europe/Berlin",
|
"localTimezone": "Europe/Berlin",
|
||||||
"specialContributionsName": "Szerkesztő_közreműködései",
|
"specialContributionsName": "Szerkesztő_közreműködései",
|
||||||
"timezones": {
|
"timezones": {
|
||||||
|
|
|
@ -1,6 +1,17 @@
|
||||||
{
|
{
|
||||||
"dateFormat": "j M Y H:i",
|
"dateFormat": "j M Y H:i",
|
||||||
"digits": "0123456789",
|
"digits": [
|
||||||
|
"0",
|
||||||
|
"1",
|
||||||
|
"2",
|
||||||
|
"3",
|
||||||
|
"4",
|
||||||
|
"5",
|
||||||
|
"6",
|
||||||
|
"7",
|
||||||
|
"8",
|
||||||
|
"9"
|
||||||
|
],
|
||||||
"localTimezone": "Europe/Berlin",
|
"localTimezone": "Europe/Berlin",
|
||||||
"specialContributionsName": "Bijdragen",
|
"specialContributionsName": "Bijdragen",
|
||||||
"timezones": {
|
"timezones": {
|
||||||
|
|
|
@ -1,6 +1,17 @@
|
||||||
{
|
{
|
||||||
"dateFormat": "H:i, j M Y",
|
"dateFormat": "H:i, j M Y",
|
||||||
"digits": "0123456789",
|
"digits": [
|
||||||
|
"0",
|
||||||
|
"1",
|
||||||
|
"2",
|
||||||
|
"3",
|
||||||
|
"4",
|
||||||
|
"5",
|
||||||
|
"6",
|
||||||
|
"7",
|
||||||
|
"8",
|
||||||
|
"9"
|
||||||
|
],
|
||||||
"localTimezone": "Europe/Warsaw",
|
"localTimezone": "Europe/Warsaw",
|
||||||
"specialContributionsName": "Wkład",
|
"specialContributionsName": "Wkład",
|
||||||
"timezones": {
|
"timezones": {
|
||||||
|
|
Loading…
Reference in a new issue