mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/DiscussionTools
synced 2024-11-15 20:10:02 +00:00
4613ae78e7
Goal: ----- To have a method like CommentParser::parse(), which just takes a node to parse and a title and returns plain data, so that we don't need to keep track of the config to construct a CommentParser object (the required config like content language is provided by services) and we don't need to keep that object around after parsing. Changes: -------- CommentParser.php: * …is now a service. Constructor only takes services as arguments. The node and title are passed to a new parse() method. * parse() should return plain data, but I split this part to a separate patch for ease of review: I49bfe019aa460651447fd383f73eafa9d7180a92. * CommentParser still cheats and accesses global state in a few places, e.g. calling Title::makeTitleSafe or CommentUtils::getTitleFromUrl, so we can't turn its tests into true unit tests. This work is left for future commits. LanguageData.php: * …is now a service, instead of a static class. Parser.js: * …is not a real service, but it's changed to behave in a similar way. Constructor takes only the required config as argument, and node and title are instead passed to a new parse() method. CommentParserTest.php: parser.test.js: * Can be simplified, now that we don't need a useless node and title to test internal methods that don't use them. testUtils.js: * Can be simplified, now that we don't need to override internal ResourceLoader stuff just to change the parser config. Change-Id: Iadb7757debe000025e52770ca51ebcf24ca8ee66
232 lines
7 KiB
PHP
232 lines
7 KiB
PHP
<?php
|
|
/**
|
|
* Generates language-specific data used by DiscussionTools.
|
|
*
|
|
* @file
|
|
* @ingroup Extensions
|
|
* @license MIT
|
|
*/
|
|
|
|
namespace MediaWiki\Extension\DiscussionTools;
|
|
|
|
use Config;
|
|
use DateTimeZone;
|
|
use ILanguageConverter;
|
|
use Language;
|
|
use MediaWiki\Languages\LanguageConverterFactory;
|
|
use MediaWiki\SpecialPage\SpecialPageFactory;
|
|
|
|
class LanguageData {
|
|
/** @var Config */
|
|
private $config;
|
|
/** @var Language */
|
|
private $language;
|
|
/** @var LanguageConverterFactory */
|
|
private $languageConverterFactory;
|
|
/** @var SpecialPageFactory */
|
|
private $specialPageFactory;
|
|
|
|
/**
|
|
* @param Config $config
|
|
* @param Language $language
|
|
* @param LanguageConverterFactory $languageConverterFactory
|
|
* @param SpecialPageFactory $specialPageFactory
|
|
*/
|
|
public function __construct(
|
|
Config $config,
|
|
Language $language,
|
|
LanguageConverterFactory $languageConverterFactory,
|
|
SpecialPageFactory $specialPageFactory
|
|
) {
|
|
$this->config = $config;
|
|
$this->language = $language;
|
|
$this->languageConverterFactory = $languageConverterFactory;
|
|
$this->specialPageFactory = $specialPageFactory;
|
|
}
|
|
|
|
/**
|
|
* Compute data we need to parse discussion threads on pages.
|
|
*
|
|
* @return array
|
|
*/
|
|
public function getLocalData(): array {
|
|
$config = $this->config;
|
|
$lang = $this->language;
|
|
$langConv = $this->languageConverterFactory->getLanguageConverter( $lang );
|
|
|
|
$data = [];
|
|
|
|
$data['dateFormat'] = [];
|
|
$dateFormat = $lang->getDateFormatString( 'both', $lang->dateFormat( false ) );
|
|
foreach ( $langConv->getVariants() as $variant ) {
|
|
$convDateFormat = $this->convertDateFormat( $dateFormat, $langConv, $variant );
|
|
$data['dateFormat'][$variant] = $convDateFormat;
|
|
}
|
|
|
|
$data['digits'] = [];
|
|
foreach ( $langConv->getVariants() as $variant ) {
|
|
$data['digits'][$variant] = [];
|
|
foreach ( str_split( '0123456789' ) as $digit ) {
|
|
if ( $config->get( 'TranslateNumerals' ) ) {
|
|
$localDigit = $lang->formatNumNoSeparators( $digit );
|
|
} else {
|
|
$localDigit = $digit;
|
|
}
|
|
$convLocalDigit = $langConv->translate( $localDigit, $variant );
|
|
$data['digits'][$variant][] = $convLocalDigit;
|
|
}
|
|
}
|
|
|
|
// ApiQuerySiteinfo
|
|
$data['localTimezone'] = $config->get( 'Localtimezone' );
|
|
|
|
$data['specialContributionsName'] = $this->specialPageFactory
|
|
->getLocalNameFor( 'Contributions' );
|
|
$data['specialNewSectionName'] = $this->specialPageFactory
|
|
->getLocalNameFor( 'NewSection' );
|
|
|
|
$localTimezone = $config->get( 'Localtimezone' );
|
|
// Return all timezone abbreviations for the local timezone (there will often be two, for
|
|
// non-DST and DST timestamps, and sometimes more due to historical data, but that's okay).
|
|
// Avoid DateTimeZone::listAbbreviations(), it returns some half-baked list that is different
|
|
// from the timezone data used by everything else in PHP.
|
|
$timezoneAbbrs = array_values( array_unique(
|
|
array_map( static function ( $transition ) {
|
|
return $transition['abbr'];
|
|
}, ( new DateTimeZone( $localTimezone ) )->getTransitions() )
|
|
) );
|
|
|
|
$data['timezones'] = [];
|
|
foreach ( $langConv->getVariants() as $variant ) {
|
|
$data['timezones'][$variant] = array_combine(
|
|
array_map( static function ( string $tzMsg ) use ( $lang, $langConv, $variant ) {
|
|
// MWTimestamp::getTimezoneMessage()
|
|
// Parser::pstPass2()
|
|
// Messages used here: 'timezone-utc' and so on
|
|
$key = 'timezone-' . strtolower( trim( $tzMsg ) );
|
|
$msg = wfMessage( $key )->inLanguage( $lang );
|
|
// TODO: This probably causes a similar issue to https://phabricator.wikimedia.org/T221294,
|
|
// but we *must* check the message existence in the database, because the messages are not
|
|
// actually defined by MediaWiki core for any timezone other than UTC...
|
|
if ( $msg->exists() ) {
|
|
$text = $msg->text();
|
|
} else {
|
|
$text = strtoupper( $tzMsg );
|
|
}
|
|
$convText = $langConv->translate( $text, $variant );
|
|
return $convText;
|
|
}, $timezoneAbbrs ),
|
|
array_map( 'strtoupper', $timezoneAbbrs )
|
|
);
|
|
}
|
|
|
|
// Messages in content language
|
|
$messagesKeys = array_merge(
|
|
Language::WEEKDAY_MESSAGES,
|
|
Language::WEEKDAY_ABBREVIATED_MESSAGES,
|
|
Language::MONTH_MESSAGES,
|
|
Language::MONTH_GENITIVE_MESSAGES,
|
|
Language::MONTH_ABBREVIATED_MESSAGES
|
|
);
|
|
$data['contLangMessages'] = [];
|
|
foreach ( $langConv->getVariants() as $variant ) {
|
|
$data['contLangMessages'][$variant] = array_combine(
|
|
$messagesKeys,
|
|
array_map( static function ( $key ) use ( $lang, $langConv, $variant ) {
|
|
$text = wfMessage( $key )->inLanguage( $lang )->text();
|
|
return $langConv->translate( $text, $variant );
|
|
}, $messagesKeys )
|
|
);
|
|
}
|
|
|
|
// How far backwards we look for a signature associated with a timestamp before giving up.
|
|
// Note that this is not a hard limit on the length of signatures we detect.
|
|
$data['signatureScanLimit'] = 100;
|
|
|
|
return $data;
|
|
}
|
|
|
|
/**
|
|
* Convert a date format string to a different language variant, leaving all special characters
|
|
* unchanged and applying language conversion to the plain text fragments.
|
|
*
|
|
* @param string $format
|
|
* @param ILanguageConverter $langConv
|
|
* @param string $variant
|
|
* @return string
|
|
*/
|
|
private function convertDateFormat(
|
|
string $format,
|
|
ILanguageConverter $langConv,
|
|
string $variant
|
|
): string {
|
|
$formatLength = strlen( $format );
|
|
$s = '';
|
|
// The supported codes must match CommentParser::getTimestampRegexp()
|
|
for ( $p = 0; $p < $formatLength; $p++ ) {
|
|
$num = false;
|
|
$code = $format[ $p ];
|
|
if ( $code === 'x' && $p < $formatLength - 1 ) {
|
|
$code .= $format[++$p];
|
|
}
|
|
if ( $code === 'xk' && $p < $formatLength - 1 ) {
|
|
$code .= $format[++$p];
|
|
}
|
|
|
|
// LAZY SHORTCUTS that might cause bugs:
|
|
// * We assume that result of $langConv->translate() doesn't produce any special codes/characters
|
|
// * We assume that calling $langConv->translate() separately for each character is correct
|
|
switch ( $code ) {
|
|
case 'xx':
|
|
case 'xg':
|
|
case 'd':
|
|
case 'D':
|
|
case 'j':
|
|
case 'l':
|
|
case 'F':
|
|
case 'M':
|
|
case 'n':
|
|
case 'Y':
|
|
case 'xkY':
|
|
case 'G':
|
|
case 'H':
|
|
case 'i':
|
|
// Special code - pass through unchanged
|
|
$s .= $code;
|
|
break;
|
|
case '\\':
|
|
// Plain text (backslash escaping) - convert to language variant
|
|
if ( $p < $formatLength - 1 ) {
|
|
$s .= '\\' . $langConv->translate( $format[++$p], $variant );
|
|
} else {
|
|
$s .= $code;
|
|
}
|
|
break;
|
|
case '"':
|
|
// Plain text (quoted literal) - convert to language variant
|
|
if ( $p < $formatLength - 1 ) {
|
|
$endQuote = strpos( $format, '"', $p + 1 );
|
|
if ( $endQuote === false ) {
|
|
// No terminating quote, assume literal "
|
|
$s .= $code;
|
|
} else {
|
|
$s .= '"' .
|
|
$langConv->translate( substr( $format, $p + 1, $endQuote - $p - 1 ), $variant ) .
|
|
'"';
|
|
$p = $endQuote;
|
|
}
|
|
} else {
|
|
// Quote at end of string, assume literal "
|
|
$s .= $code;
|
|
}
|
|
break;
|
|
default:
|
|
// Plain text - convert to language variant
|
|
$s .= $langConv->translate( $format[$p], $variant );
|
|
}
|
|
}
|
|
|
|
return $s;
|
|
}
|
|
}
|