Reduce duplication between PHP parser and data gen for JS parser

Also, make the handling of TranslateNumerals and digitsRegexp the same
between PHP and JS.

Change-Id: I1d81343d0b59ab3ecd59ba1c2ad99a729d983ac4
This commit is contained in:
Bartosz Dziewoński 2020-05-19 20:40:05 +02:00
parent d35facc83f
commit 515af82061
3 changed files with 37 additions and 77 deletions

View file

@ -36,22 +36,17 @@ class CommentParser {
public function __construct( Language $language, Config $config, array $data = [] ) {
$this->language = $language;
$this->config = $config;
$this->dateFormat = $this->language->getDateFormatString(
'both',
$this->language->dateFormat( false )
);
// TODO: We probably shouldn't assume that each digit can be represented by a single BMP
// codepoint in every language (although it seems to be true right now).
$this->digits = $this->config->get( 'TranslateNumerals' ) ?
$this->language->formatNum( '0123456789', true ) :
null;
$this->digitsRegexp = $this->config->get( 'TranslateNumerals' ) ?
'[' . $this->language->formatNum( '0123456789', true ) . ']' :
'\\d';
if ( !$data ) {
// TODO: Instead of passing data used for mocking, mock the methods that fetch the data.
$this->data = $data;
$this->localTimezone = $this->config->get( 'Localtimezone' );
$this->timezoneAbbrs = $this->computeTimezoneAbbrs();
$data = Data::getLocalData( null, $config, $language );
}
$this->dateFormat = $data['dateFormat'];
$this->digits = $data['digits'];
$this->contLangMessages = $data['contLangMessages'];
$this->localTimezone = $data['localTimezone'];
$this->timezones = $data['timezones'];
}
public static function newFromGlobalState() : CommentParser {
@ -61,43 +56,6 @@ class CommentParser {
);
}
/**
* Build the timezone abbreviations map for the local timezone.
* @return array Associative array mapping localised timezone abbreviations to IANA abbreviations
*/
private function computeTimezoneAbbrs() : array {
// Return only timezone abbreviations for the local timezone (there will often be two, for
// non-DST and DST timestamps, and sometimes more due to historical data, but that's okay).
$timezoneAbbrs = array_keys( array_filter(
DateTimeZone::listAbbreviations(),
function ( array $timezones ) {
foreach ( $timezones as $tz ) {
if ( $tz['timezone_id'] === $this->localTimezone ) {
return true;
}
}
return false;
}
) );
return array_combine(
array_map( function ( string $tzMsg ) {
// MWTimestamp::getTimezoneMessage()
// Parser::pstPass2()
// Messages used here: 'timezone-utc' and so on
$key = 'timezone-' . strtolower( trim( $tzMsg ) );
$msg = wfMessage( $key )->inLanguage( $this->language );
// TODO: This probably causes a similar issue to https://phabricator.wikimedia.org/T221294,
// but we *must* check the message existence in the database, because the messages are not
// actually defined by MediaWiki core for any timezone other than UTC...
if ( $msg->exists() ) {
return $this->getMessages( [ $key ] )[0];
}
return strtoupper( $tzMsg );
}, $timezoneAbbrs ),
array_map( 'strtoupper', $timezoneAbbrs )
);
}
/**
* Get a MediaWiki page title from a URL
* @param string $url
@ -187,9 +145,7 @@ class CommentParser {
*/
private function getMessages( array $messageKeys ) : array {
return array_map( function ( string $key ) {
return isset( $this->data['contLangMessages'][$key] ) ?
$this->data['contLangMessages'][$key] :
wfMessage( $key )->inLanguage( $this->language )->text();
return $this->contLangMessages[$key];
}, $messageKeys );
}
@ -502,8 +458,8 @@ class CommentParser {
public function getLocalTimestampRegexp() : string {
return $this->getTimestampRegexp(
$this->dateFormat,
$this->digitsRegexp,
$this->timezoneAbbrs
$this->digits ? "[$this->digits]" : '\\d',
$this->timezones
);
}
@ -520,7 +476,7 @@ class CommentParser {
$this->dateFormat,
$this->digits,
$this->localTimezone,
$this->timezoneAbbrs
$this->timezones
);
}

View file

@ -10,6 +10,7 @@
namespace MediaWiki\Extension\DiscussionTools;
use Config;
use DateTimeZone;
use ExtensionRegistry;
use Language;
use MediaWiki\MediaWikiServices;
@ -24,18 +25,18 @@ class Data {
* We need all of this data *in content language*. Some of it is already available in JS, but only
* in client language, so it's useless for us (e.g. digit transform table, month name messages).
*
* @param ResourceLoaderContext $context
* @param ResourceLoaderContext|null $context
* @param Config $config
* @param string|null $langCode
* @param string|Language|null $lang
* @return array
*/
public static function getLocalData(
ResourceLoaderContext $context, Config $config, string $langCode = null
?ResourceLoaderContext $context, Config $config, $lang = null
) : array {
if ( $langCode ) {
$lang = Language::factory( $langCode );
} else {
if ( !$lang ) {
$lang = MediaWikiServices::getInstance()->getContentLanguage();
} elseif ( !( $lang instanceof Language ) ) {
$lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( $lang );
}
$data = [];
@ -44,7 +45,9 @@ class Data {
// TODO: We probably shouldn't assume that each digit can be represented by a single BMP
// codepoint in every language (although it seems to be true right now).
$data['digits'] = $lang->formatNum( '0123456789', true );
$data['digits'] = $config->get( 'TranslateNumerals' ) ?
$lang->formatNum( '0123456789', true ) :
null;
// ApiQuerySiteinfo
$data['localTimezone'] = $config->get( 'Localtimezone' );
@ -56,7 +59,7 @@ class Data {
// Return only timezone abbreviations for the local timezone (there will often be two, for
// non-DST and DST timestamps, and sometimes more due to historical data, but that's okay).
$timezoneAbbrs = array_keys( array_filter(
timezone_abbreviations_list(),
DateTimeZone::listAbbreviations(),
function ( array $timezones ) use ( $localTimezone ) {
foreach ( $timezones as $tz ) {
if ( $tz['timezone_id'] === $localTimezone ) {

View file

@ -342,11 +342,11 @@ function getTimestampParser( format, digits, localTimezone, tzAbbrs ) {
* @return {string} Regular expression
*/
function getLocalTimestampRegexp() {
var
df = data.dateFormat,
digitsRegexp = mw.config.get( 'wgTranslateNumerals' ) ? '[' + data.digits + ']' : '\\d',
dfRegexp = getTimestampRegexp( df, digitsRegexp, data.timezones );
return dfRegexp;
return getTimestampRegexp(
data.dateFormat,
data.digits ? '[' + data.digits + ']' : '\\d',
data.timezones
);
}
/**
@ -361,11 +361,12 @@ function getLocalTimestampRegexp() {
* @return {Date} return.return
*/
function getLocalTimestampParser() {
var
df = data.dateFormat,
digits = mw.config.get( 'wgTranslateNumerals' ) ? data.digits : null,
parseFunction = getTimestampParser( df, digits, data.localTimezone, data.timezones );
return parseFunction;
return getTimestampParser(
data.dateFormat,
data.digits,
data.localTimezone,
data.timezones
);
}
/**