Reduce duplication between PHP parser and data gen for JS parser

Also, make the handling of TranslateNumerals and digitsRegexp the same
between PHP and JS.

Change-Id: I1d81343d0b59ab3ecd59ba1c2ad99a729d983ac4
This commit is contained in:
Bartosz Dziewoński 2020-05-19 20:40:05 +02:00
parent d35facc83f
commit 515af82061
3 changed files with 37 additions and 77 deletions

View file

@ -36,22 +36,17 @@ class CommentParser {
public function __construct( Language $language, Config $config, array $data = [] ) { public function __construct( Language $language, Config $config, array $data = [] ) {
$this->language = $language; $this->language = $language;
$this->config = $config; $this->config = $config;
$this->dateFormat = $this->language->getDateFormatString(
'both', if ( !$data ) {
$this->language->dateFormat( false )
);
// TODO: We probably shouldn't assume that each digit can be represented by a single BMP
// codepoint in every language (although it seems to be true right now).
$this->digits = $this->config->get( 'TranslateNumerals' ) ?
$this->language->formatNum( '0123456789', true ) :
null;
$this->digitsRegexp = $this->config->get( 'TranslateNumerals' ) ?
'[' . $this->language->formatNum( '0123456789', true ) . ']' :
'\\d';
// TODO: Instead of passing data used for mocking, mock the methods that fetch the data. // TODO: Instead of passing data used for mocking, mock the methods that fetch the data.
$this->data = $data; $data = Data::getLocalData( null, $config, $language );
$this->localTimezone = $this->config->get( 'Localtimezone' ); }
$this->timezoneAbbrs = $this->computeTimezoneAbbrs();
$this->dateFormat = $data['dateFormat'];
$this->digits = $data['digits'];
$this->contLangMessages = $data['contLangMessages'];
$this->localTimezone = $data['localTimezone'];
$this->timezones = $data['timezones'];
} }
public static function newFromGlobalState() : CommentParser { public static function newFromGlobalState() : CommentParser {
@ -61,43 +56,6 @@ class CommentParser {
); );
} }
/**
* Build the timezone abbreviations map for the local timezone.
* @return array Associative array mapping localised timezone abbreviations to IANA abbreviations
*/
private function computeTimezoneAbbrs() : array {
// Return only timezone abbreviations for the local timezone (there will often be two, for
// non-DST and DST timestamps, and sometimes more due to historical data, but that's okay).
$timezoneAbbrs = array_keys( array_filter(
DateTimeZone::listAbbreviations(),
function ( array $timezones ) {
foreach ( $timezones as $tz ) {
if ( $tz['timezone_id'] === $this->localTimezone ) {
return true;
}
}
return false;
}
) );
return array_combine(
array_map( function ( string $tzMsg ) {
// MWTimestamp::getTimezoneMessage()
// Parser::pstPass2()
// Messages used here: 'timezone-utc' and so on
$key = 'timezone-' . strtolower( trim( $tzMsg ) );
$msg = wfMessage( $key )->inLanguage( $this->language );
// TODO: This probably causes a similar issue to https://phabricator.wikimedia.org/T221294,
// but we *must* check the message existence in the database, because the messages are not
// actually defined by MediaWiki core for any timezone other than UTC...
if ( $msg->exists() ) {
return $this->getMessages( [ $key ] )[0];
}
return strtoupper( $tzMsg );
}, $timezoneAbbrs ),
array_map( 'strtoupper', $timezoneAbbrs )
);
}
/** /**
* Get a MediaWiki page title from a URL * Get a MediaWiki page title from a URL
* @param string $url * @param string $url
@ -187,9 +145,7 @@ class CommentParser {
*/ */
private function getMessages( array $messageKeys ) : array { private function getMessages( array $messageKeys ) : array {
return array_map( function ( string $key ) { return array_map( function ( string $key ) {
return isset( $this->data['contLangMessages'][$key] ) ? return $this->contLangMessages[$key];
$this->data['contLangMessages'][$key] :
wfMessage( $key )->inLanguage( $this->language )->text();
}, $messageKeys ); }, $messageKeys );
} }
@ -502,8 +458,8 @@ class CommentParser {
public function getLocalTimestampRegexp() : string { public function getLocalTimestampRegexp() : string {
return $this->getTimestampRegexp( return $this->getTimestampRegexp(
$this->dateFormat, $this->dateFormat,
$this->digitsRegexp, $this->digits ? "[$this->digits]" : '\\d',
$this->timezoneAbbrs $this->timezones
); );
} }
@ -520,7 +476,7 @@ class CommentParser {
$this->dateFormat, $this->dateFormat,
$this->digits, $this->digits,
$this->localTimezone, $this->localTimezone,
$this->timezoneAbbrs $this->timezones
); );
} }

View file

@ -10,6 +10,7 @@
namespace MediaWiki\Extension\DiscussionTools; namespace MediaWiki\Extension\DiscussionTools;
use Config; use Config;
use DateTimeZone;
use ExtensionRegistry; use ExtensionRegistry;
use Language; use Language;
use MediaWiki\MediaWikiServices; use MediaWiki\MediaWikiServices;
@ -24,18 +25,18 @@ class Data {
* We need all of this data *in content language*. Some of it is already available in JS, but only * We need all of this data *in content language*. Some of it is already available in JS, but only
* in client language, so it's useless for us (e.g. digit transform table, month name messages). * in client language, so it's useless for us (e.g. digit transform table, month name messages).
* *
* @param ResourceLoaderContext $context * @param ResourceLoaderContext|null $context
* @param Config $config * @param Config $config
* @param string|null $langCode * @param string|Language|null $lang
* @return array * @return array
*/ */
public static function getLocalData( public static function getLocalData(
ResourceLoaderContext $context, Config $config, string $langCode = null ?ResourceLoaderContext $context, Config $config, $lang = null
) : array { ) : array {
if ( $langCode ) { if ( !$lang ) {
$lang = Language::factory( $langCode );
} else {
$lang = MediaWikiServices::getInstance()->getContentLanguage(); $lang = MediaWikiServices::getInstance()->getContentLanguage();
} elseif ( !( $lang instanceof Language ) ) {
$lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( $lang );
} }
$data = []; $data = [];
@ -44,7 +45,9 @@ class Data {
// TODO: We probably shouldn't assume that each digit can be represented by a single BMP // TODO: We probably shouldn't assume that each digit can be represented by a single BMP
// codepoint in every language (although it seems to be true right now). // codepoint in every language (although it seems to be true right now).
$data['digits'] = $lang->formatNum( '0123456789', true ); $data['digits'] = $config->get( 'TranslateNumerals' ) ?
$lang->formatNum( '0123456789', true ) :
null;
// ApiQuerySiteinfo // ApiQuerySiteinfo
$data['localTimezone'] = $config->get( 'Localtimezone' ); $data['localTimezone'] = $config->get( 'Localtimezone' );
@ -56,7 +59,7 @@ class Data {
// Return only timezone abbreviations for the local timezone (there will often be two, for // Return only timezone abbreviations for the local timezone (there will often be two, for
// non-DST and DST timestamps, and sometimes more due to historical data, but that's okay). // non-DST and DST timestamps, and sometimes more due to historical data, but that's okay).
$timezoneAbbrs = array_keys( array_filter( $timezoneAbbrs = array_keys( array_filter(
timezone_abbreviations_list(), DateTimeZone::listAbbreviations(),
function ( array $timezones ) use ( $localTimezone ) { function ( array $timezones ) use ( $localTimezone ) {
foreach ( $timezones as $tz ) { foreach ( $timezones as $tz ) {
if ( $tz['timezone_id'] === $localTimezone ) { if ( $tz['timezone_id'] === $localTimezone ) {

View file

@ -342,11 +342,11 @@ function getTimestampParser( format, digits, localTimezone, tzAbbrs ) {
* @return {string} Regular expression * @return {string} Regular expression
*/ */
function getLocalTimestampRegexp() { function getLocalTimestampRegexp() {
var return getTimestampRegexp(
df = data.dateFormat, data.dateFormat,
digitsRegexp = mw.config.get( 'wgTranslateNumerals' ) ? '[' + data.digits + ']' : '\\d', data.digits ? '[' + data.digits + ']' : '\\d',
dfRegexp = getTimestampRegexp( df, digitsRegexp, data.timezones ); data.timezones
return dfRegexp; );
} }
/** /**
@ -361,11 +361,12 @@ function getLocalTimestampRegexp() {
* @return {Date} return.return * @return {Date} return.return
*/ */
function getLocalTimestampParser() { function getLocalTimestampParser() {
var return getTimestampParser(
df = data.dateFormat, data.dateFormat,
digits = mw.config.get( 'wgTranslateNumerals' ) ? data.digits : null, data.digits,
parseFunction = getTimestampParser( df, digits, data.localTimezone, data.timezones ); data.localTimezone,
return parseFunction; data.timezones
);
} }
/** /**