Merge "Parsing discussions converted to language variants"

2024-09-24 10:58:20 +00:00 · 2020-09-16 22:52:37 +00:00 · 2020-09-16 22:52:37 +00:00 · 328674d042
parent 53cb5a3b19 329df8c953
commit 328674d042
12 changed files with 816 additions and 596 deletions
--- a/includes/CommentParser.php
+++ b/includes/CommentParser.php
@ -37,9 +37,12 @@ class CommentParser {
 	/** @var Config */
 	private $config;
 	/** @var Language */
 	private $language;
 	private $dateFormat;
 	private $digits;
-	/** @var string[] */
+	/** @var string[][] */
 	private $contLangMessages;
 	private $localTimezone;
 	private $timezones;
@ -53,6 +56,7 @@ class CommentParser {
 	public function __construct( DOMElement $rootNode, Language $language, Config $config, array $data = [] ) {
 		$this->rootNode = $rootNode;
 		$this->config = $config;
 		$this->language = $language;
 		if ( !$data ) {
 			// TODO: Instead of passing data used for mocking, mock the methods that fetch the data.
@ -133,13 +137,16 @@ class CommentParser {
 	}
 	/**
-	 * @param string[] $messageKeys Message keys
+	 * Get text of localisation messages in content language.
 	 *
 	 * @param string $contLangVariant Content language variant
 	 * @param string[] $messages Message keys
 	 * @return string[] Message values
 	 */
-	private function getMessages( array $messageKeys ) : array {
+	private function getMessages( string $contLangVariant, array $messages ) : array {
-		return array_map( function ( string $key ) {
+		return array_map( function ( string $key ) use ( $contLangVariant ) {
-			return $this->contLangMessages[$key];
+			return $this->contLangMessages[$contLangVariant][$key];
-		}, $messageKeys );
+		}, $messages );
 	}
 	/**
@ -150,6 +157,7 @@ class CommentParser {
 	 * and only dates when MediaWiki existed, let's say 2000 onwards (Thai dates before 1941 are
 	 * complicated).
 	 *
 	 * @param string $contLangVariant Content language variant
 	 * @param string $format Date format
 	 * @param string $digitsRegexp Regular expression matching a single localised digit, e.g. '[0-9]'
 	 * @param array $tzAbbrs Associative array mapping localised timezone abbreviations to
@ -157,7 +165,7 @@ class CommentParser {
 	 * @return string Regular expression
 	 */
 	private function getTimestampRegexp(
-		string $format, string $digitsRegexp, array $tzAbbrs
+		string $contLangVariant, string $format, string $digitsRegexp, array $tzAbbrs
 	) : string {
 		$formatLength = strlen( $format );
 		$s = '';
@ -178,7 +186,7 @@ class CommentParser {
 					break;
 				case 'xg':
 					$s .= self::regexpAlternateGroup(
-						$this->getMessages( Language::MONTH_GENITIVE_MESSAGES )
+						$this->getMessages( $contLangVariant, Language::MONTH_GENITIVE_MESSAGES )
 					);
 					break;
 				case 'd':
@ -186,7 +194,7 @@ class CommentParser {
 					break;
 				case 'D':
 					$s .= self::regexpAlternateGroup(
-						$this->getMessages( Language::WEEKDAY_ABBREVIATED_MESSAGES )
+						$this->getMessages( $contLangVariant, Language::WEEKDAY_ABBREVIATED_MESSAGES )
 					);
 					break;
 				case 'j':
@ -194,17 +202,17 @@ class CommentParser {
 					break;
 				case 'l':
 					$s .= self::regexpAlternateGroup(
-						$this->getMessages( Language::WEEKDAY_MESSAGES )
+						$this->getMessages( $contLangVariant, Language::WEEKDAY_MESSAGES )
 					);
 					break;
 				case 'F':
 					$s .= self::regexpAlternateGroup(
-						$this->getMessages( Language::MONTH_MESSAGES )
+						$this->getMessages( $contLangVariant, Language::MONTH_MESSAGES )
 					);
 					break;
 				case 'M':
 					$s .= self::regexpAlternateGroup(
-						$this->getMessages( Language::MONTH_ABBREVIATED_MESSAGES )
+						$this->getMessages( $contLangVariant, Language::MONTH_ABBREVIATED_MESSAGES )
 					);
 					break;
 				case 'n':
@ -269,6 +277,7 @@ class CommentParser {
 	 * Get a function that parses timestamps generated using the given date format, based on the result
 	 * of matching the regexp returned by getTimestampRegexp()
 	 *
 	 * @param string $contLangVariant Content language variant
 	 * @param string $format Date format, as used by MediaWiki
 	 * @param string[]|null $digits Localised digits from 0 to 9, e.g. `[ '0', '1', ..., '9' ]`
 	 * @param string $localTimezone Local timezone IANA name, e.g. `America/New_York`
@ -277,7 +286,7 @@ class CommentParser {
 	 * @return callable Parser function
 	 */
 	private function getTimestampParser(
-		string $format, ?array $digits, string $localTimezone, array $tzAbbrs
+		string $contLangVariant, string $format, ?array $digits, string $localTimezone, array $tzAbbrs
 	) : callable {
 		$untransformDigits = function ( string $text ) use ( $digits ) {
 			if ( !$digits ) {
@ -342,7 +351,7 @@ class CommentParser {
 		}
 		return function ( array $match ) use (
-			$matchingGroups, $untransformDigits, $localTimezone, $tzAbbrs
+			$matchingGroups, $untransformDigits, $localTimezone, $tzAbbrs, $contLangVariant
 		) {
 			if ( is_array( $match[0] ) ) {
 				// Strip PREG_OFFSET_CAPTURE data
@ -360,7 +369,10 @@ class CommentParser {
 				$text = $match[$i + 1];
 				switch ( $code ) {
 					case 'xg':
-						$monthIdx = array_search( $text, $this->getMessages( Language::MONTH_GENITIVE_MESSAGES ) );
+						$monthIdx = array_search(
 							$text,
 							$this->getMessages( $contLangVariant, Language::MONTH_GENITIVE_MESSAGES )
 						);
 						break;
 					case 'd':
 					case 'j':
@ -371,10 +383,16 @@ class CommentParser {
 						// Day of the week - unused
 						break;
 					case 'F':
-						$monthIdx = array_search( $text, $this->getMessages( Language::MONTH_MESSAGES ) );
+						$monthIdx = array_search(
 							$text,
 							$this->getMessages( $contLangVariant, Language::MONTH_MESSAGES )
 						);
 						break;
 					case 'M':
-						$monthIdx = array_search( $text, $this->getMessages( Language::MONTH_ABBREVIATED_MESSAGES ) );
+						$monthIdx = array_search(
 							$text,
 							$this->getMessages( $contLangVariant, Language::MONTH_ABBREVIATED_MESSAGES )
 						);
 						break;
 					case 'n':
 						$monthIdx = intval( $untransformDigits( $text ) ) - 1;
@ -444,35 +462,41 @@ class CommentParser {
 	}
 	/**
-	 * Get a regular expression that matches timestamps in the local date format.
+	 * Get a regexp that matches timestamps in the local date format, for each language variant.
 	 *
 	 * This calls getTimestampRegexp() with predefined data for the current wiki.
 	 *
-	 * @return string Regular expression
+	 * @return string[] Regular expressions
 	 */
-	public function getLocalTimestampRegexp() : string {
+	public function getLocalTimestampRegexps() : array {
-		return $this->getTimestampRegexp(
+		return array_map( function ( $contLangVariant ) {
-			$this->dateFormat,
+			return $this->getTimestampRegexp(
-			$this->digits ? '[' . implode( '', $this->digits ) . ']' : '\\d',
+				$contLangVariant,
-			$this->timezones
+				$this->dateFormat[$contLangVariant],
-		);
+				'[' . implode( '', $this->digits[$contLangVariant] ) . ']',
 				$this->timezones[$contLangVariant]
 			);
 		}, $this->language->getVariants() );
 	}
 	/**
-	 * Get a function that parses timestamps in the local date format, based on the result
+	 * Get a function that parses timestamps in the local date format, for each language variant,
-	 * of matching the regexp returned by getLocalTimestampRegexp().
+	 * based on the result of matching the regexp returned by getLocalTimestampRegexp().
 	 *
 	 * This calls getTimestampParser() with predefined data for the current wiki.
 	 *
-	 * @return callable Parser function
+	 * @return callable[] Parser functions
 	 */
-	private function getLocalTimestampParser() : callable {
+	private function getLocalTimestampParsers() : array {
-		return $this->getTimestampParser(
+		return array_map( function ( $contLangVariant ) {
-			$this->dateFormat,
+			return $this->getTimestampParser(
-			$this->digits,
+				$contLangVariant,
-			$this->localTimezone,
+				$this->dateFormat[$contLangVariant],
-			$this->timezones
+				$this->digits[$contLangVariant],
-		);
+				$this->localTimezone,
 				$this->timezones[$contLangVariant]
 			);
 		}, $this->language->getVariants() );
 	}
 	/**
@ -601,10 +625,14 @@ class CommentParser {
 	 * Find a timestamps in a given text node
 	 *
 	 * @param DOMText $node Text node
-	 * @param string $timestampRegex Timestamp regex
+	 * @param string[] $timestampRegexps Timestamp regexps
-	 * @return array|null Match data
+	 * @return array|null Array with the following keys:
 	 *   - int 'offset' Length of extra text preceding the node that was used for matching
 	 *   - int 'parserIndex' Which of the regexps matched
 	 *   - array 'matchData' Regexp match data, which specifies the location of the match,
 	 *     and which can be parsed using getLocalTimestampParsers()
 	 */
-	public function findTimestamp( DOMText $node, string $timestampRegex ) : ?array {
+	public function findTimestamp( DOMText $node, array $timestampRegexps ) : ?array {
 		$nodeText = '';
 		$offset = 0;
@ -638,11 +666,16 @@ class CommentParser {
 			}
 		}
-		$matchData = null;
+		foreach ( $timestampRegexps as $i => $timestampRegexp ) {
-		// Allows us to mimic match.index in #getComments
+			$matchData = null;
-		if ( preg_match( $timestampRegex, $nodeText, $matchData, PREG_OFFSET_CAPTURE ) ) {
+			// Allows us to mimic match.index in #getComments
-			$matchData['offset'] = $offset;
+			if ( preg_match( $timestampRegexp, $nodeText, $matchData, PREG_OFFSET_CAPTURE ) ) {
-			return $matchData;
+				return [
 					'matchData' => $matchData,
 					'offset' => $offset,
 					'parserIndex' => $i,
 				];
 			}
 		}
 		return null;
 	}
@ -715,10 +748,10 @@ class CommentParser {
 	}
 	private function buildThreadItems() : void {
-		$timestampRegex = $this->getLocalTimestampRegexp();
+		$timestampRegexps = $this->getLocalTimestampRegexps();
 		$commentItems = [];
 		$threadItems = [];
-		$dfParser = $this->getLocalTimestampParser();
+		$dfParsers = $this->getLocalTimestampParsers();
 		// Placeholder heading in case there are comments in the 0th section
 		$range = new ImmutableRange( $this->rootNode, 0, $this->rootNode, 0 );
@ -739,7 +772,7 @@ class CommentParser {
 				$curComment = new HeadingItem( $range );
 				$curComment->setRootNode( $this->rootNode );
 				$threadItems[] = $curComment;
-			} elseif ( $node instanceof DOMText && ( $match = $this->findTimestamp( $node, $timestampRegex ) ) ) {
+			} elseif ( $node instanceof DOMText && ( $match = $this->findTimestamp( $node, $timestampRegexps ) ) ) {
 				$warnings = [];
 				$foundSignature = $this->findSignature( $node, $lastSigNode );
 				$author = $foundSignature[1];
@ -753,7 +786,7 @@ class CommentParser {
 				}
 				$lastSigNodeOffset = $lastSigNode === $node ?
-					$match[0][1] + strlen( $match[0][0] ) - $match['offset'] :
+					$match['matchData'][0][1] + strlen( $match['matchData'][0][0] ) - $match['offset'] :
 					CommentUtils::childIndexOf( $lastSigNode ) + 1;
 				$sigRange = new ImmutableRange(
 					$firstSigNode->parentNode,
@ -800,7 +833,7 @@ class CommentParser {
 				// Should this use the indent level of $startNode or $node?
 				$level = min( $startLevel, $endLevel );
-				$dateTime = $dfParser( $match );
+				$dateTime = $dfParsers[ $match['parserIndex'] ]( $match['matchData'] );
 				if ( isset( $dateTime->discussionToolsWarning ) ) {
 					$warnings[] = $dateTime->discussionToolsWarning;
 				}
--- a/includes/Data.php
+++ b/includes/Data.php
@ -12,6 +12,7 @@ namespace MediaWiki\Extension\DiscussionTools;
 use Config;
 use DateTimeZone;
 use ExtensionRegistry;
 use ILanguageConverter;
 use Language;
 use MediaWiki\MediaWikiServices;
 use ResourceLoaderContext;
@ -39,13 +40,30 @@ class Data {
 			$lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( $lang );
 		}
 		$langConv = $lang->getConverter();
 		$data = [];
-		$data['dateFormat'] = $lang->getDateFormatString( 'both', $lang->dateFormat( false ) );
+		$data['dateFormat'] = [];
 		$dateFormat = $lang->getDateFormatString( 'both', $lang->dateFormat( false ) );
 		foreach ( $lang->getVariants() as $variant ) {
 			$convDateFormat = self::convertDateFormat( $dateFormat, $langConv, $variant );
 			$data['dateFormat'][$variant] = $convDateFormat;
 		}
-		$data['digits'] = $config->get( 'TranslateNumerals' ) ?
+		$data['digits'] = [];
-			preg_split( '//u', $lang->formatNumNoSeparators( '0123456789' ), -1, PREG_SPLIT_NO_EMPTY ) :
+		foreach ( $lang->getVariants() as $variant ) {
-			null;
+			$data['digits'][$variant] = [];
 			foreach ( str_split( '0123456789' ) as $digit ) {
 				if ( $config->get( 'TranslateNumerals' ) ) {
 					$localDigit = $lang->formatNumNoSeparators( $digit );
 				} else {
 					$localDigit = $digit;
 				}
 				$convLocalDigit = $langConv->translate( $localDigit, $variant );
 				$data['digits'][$variant][] = $convLocalDigit;
 			}
 		}
 		// ApiQuerySiteinfo
 		$data['localTimezone'] = $config->get( 'Localtimezone' );
@ -67,23 +85,30 @@ class Data {
 				return false;
 			}
 		) );
-		$data['timezones'] = array_combine(
+
-			array_map( function ( string $tzMsg ) use ( $lang ) {
+		$data['timezones'] = [];
-				// MWTimestamp::getTimezoneMessage()
+		foreach ( $lang->getVariants() as $variant ) {
-				// Parser::pstPass2()
+			$data['timezones'][$variant] = array_combine(
-				// Messages used here: 'timezone-utc' and so on
+				array_map( function ( string $tzMsg ) use ( $lang, $langConv, $variant ) {
-				$key = 'timezone-' . strtolower( trim( $tzMsg ) );
+					// MWTimestamp::getTimezoneMessage()
-				$msg = wfMessage( $key )->inLanguage( $lang );
+					// Parser::pstPass2()
-				// TODO: This probably causes a similar issue to https://phabricator.wikimedia.org/T221294,
+					// Messages used here: 'timezone-utc' and so on
-				// but we *must* check the message existence in the database, because the messages are not
+					$key = 'timezone-' . strtolower( trim( $tzMsg ) );
-				// actually defined by MediaWiki core for any timezone other than UTC...
+					$msg = wfMessage( $key )->inLanguage( $lang );
-				if ( $msg->exists() ) {
+					// TODO: This probably causes a similar issue to https://phabricator.wikimedia.org/T221294,
-					return $msg->text();
+					// but we *must* check the message existence in the database, because the messages are not
-				}
+					// actually defined by MediaWiki core for any timezone other than UTC...
-				return strtoupper( $tzMsg );
+					if ( $msg->exists() ) {
-			}, $timezoneAbbrs ),
+						$text = $msg->text();
-			array_map( 'strtoupper', $timezoneAbbrs )
+					} else {
-		);
+						$text = strtoupper( $tzMsg );
 					}
 					$convText = $langConv->translate( $text, $variant );
 					return $convText;
 				}, $timezoneAbbrs ),
 				array_map( 'strtoupper', $timezoneAbbrs )
 			);
 		}
 		// Messages in content language
 		$messagesKeys = array_merge(
@ -93,12 +118,16 @@ class Data {
 			Language::MONTH_GENITIVE_MESSAGES,
 			Language::MONTH_ABBREVIATED_MESSAGES
 		);
-		$data['contLangMessages'] = array_combine(
+		$data['contLangMessages'] = [];
-			$messagesKeys,
+		foreach ( $lang->getVariants() as $variant ) {
-			array_map( function ( $key ) use ( $lang ) {
+			$data['contLangMessages'][$variant] = array_combine(
-				return wfMessage( $key )->inLanguage( $lang )->text();
+				$messagesKeys,
-			}, $messagesKeys )
+				array_map( function ( $key ) use ( $lang, $langConv, $variant ) {
-		);
+					$text = wfMessage( $key )->inLanguage( $lang )->text();
 					return $langConv->translate( $text, $variant );
 				}, $messagesKeys )
 			);
 		}
 		// How far backwards we look for a signature associated with a timestamp before giving up.
 		// Note that this is not a hard limit on the length of signatures we detect.
@ -107,6 +136,89 @@ class Data {
 		return $data;
 	}
 	/**
 	 * Convert a date format string to a different language variant, leaving all special characters
 	 * unchanged and applying language conversion to the plain text fragments.
 	 *
 	 * @param string $format
 	 * @param ILanguageConverter $langConv
 	 * @param string $variant
 	 * @return string
 	 */
 	private static function convertDateFormat(
 		string $format,
 		ILanguageConverter $langConv,
 		string $variant
 	) : string {
 		$formatLength = strlen( $format );
 		$s = '';
 		// The supported codes must match CommentParser::getTimestampRegexp()
 		for ( $p = 0; $p < $formatLength; $p++ ) {
 			$num = false;
 			$code = $format[ $p ];
 			if ( $code === 'x' && $p < $formatLength - 1 ) {
 				$code .= $format[++$p];
 			}
 			if ( $code === 'xk' && $p < $formatLength - 1 ) {
 				$code .= $format[++$p];
 			}
 			// LAZY SHORTCUTS that might cause bugs:
 			// * We assume that result of $langConv->translate() doesn't produce any special codes/characters
 			// * We assume that calling $langConv->translate() separately for each character is correct
 			switch ( $code ) {
 				case 'xx' :
 				case 'xg':
 				case 'd':
 				case 'D':
 				case 'j':
 				case 'l':
 				case 'F':
 				case 'M':
 				case 'n':
 				case 'Y':
 				case 'xkY':
 				case 'G':
 				case 'H':
 				case 'i':
 					// Special code - pass through unchanged
 					$s .= $code;
 					break;
 				case '\\':
 					// Plain text (backslash escaping) - convert to language variant
 					if ( $p < $formatLength - 1 ) {
 						$s .= '\\' . $langConv->translate( $format[++$p], $variant );
 					} else {
 						$s .= $code;
 					}
 					break;
 				case '"':
 					// Plain text (quoted literal) - convert to language variant
 					if ( $p < $formatLength - 1 ) {
 						$endQuote = strpos( $format, '"', $p + 1 );
 						if ( $endQuote === false ) {
 							// No terminating quote, assume literal "
 							$s .= $code;
 						} else {
 							$s .= '"' .
 								$langConv->translate( substr( $format, $p + 1, $endQuote - $p - 1 ), $variant ) .
 								'"';
 							$p = $endQuote;
 						}
 					} else {
 						// Quote at end of string, assume literal "
 						$s .= $code;
 					}
 					break;
 				default:
 					// Plain text - convert to language variant
 					$s .= $langConv->translate( $format[$p], $variant );
 			}
 		}
 		return $s;
 	}
 	/**
 	 * Return messages in content language, for use in a ResourceLoader module.
 	 *
--- a/modules/Parser.js
+++ b/modules/Parser.js
@ -35,12 +35,13 @@ OO.initClass( Parser );
 * Get text of localisation messages in content language.
 *
 * @private
- * @param {string[]} messages
+ * @param {string} contLangVariant Content language variant
- * @return {string[]}
+ * @param {string[]} messages Message keys
 * @return {string[]} Message values
 */
-function getMessages( messages ) {
+function getMessages( contLangVariant, messages ) {
 	return messages.map( function ( code ) {
-		return data.contLangMessages[ code ];
+		return data.contLangMessages[ contLangVariant ][ code ];
 	} );
 }
@ -53,13 +54,14 @@ function getMessages( messages ) {
 * complicated).
 *
 * @private
 * @param {string} contLangVariant Content language variant
 * @param {string} format Date format, as used by MediaWiki
 * @param {string} digitsRegexp Regular expression matching a single localised digit, e.g. `[0-9]`
 * @param {Object} tzAbbrs Map of localised timezone abbreviations to IANA abbreviations
 *   for the local timezone, e.g. `{EDT: "EDT", EST: "EST"}`
 * @return {string} Regular expression
 */
-Parser.prototype.getTimestampRegexp = function ( format, digitsRegexp, tzAbbrs ) {
+Parser.prototype.getTimestampRegexp = function ( contLangVariant, format, digitsRegexp, tzAbbrs ) {
 	var s, p, num, code, endQuote, tzRegexp, regexp;
 	function regexpGroup( r ) {
@ -87,7 +89,7 @@ Parser.prototype.getTimestampRegexp = function ( format, digitsRegexp, tzAbbrs )
 				s += 'x';
 				break;
 			case 'xg':
-				s += regexpAlternateGroup( getMessages( [
+				s += regexpAlternateGroup( getMessages( contLangVariant, [
 					'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
 					'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
 					'december-gen'
@ -97,7 +99,7 @@ Parser.prototype.getTimestampRegexp = function ( format, digitsRegexp, tzAbbrs )
 				num = '2';
 				break;
 			case 'D':
-				s += regexpAlternateGroup( getMessages( [
+				s += regexpAlternateGroup( getMessages( contLangVariant, [
 					'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
 				] ) );
 				break;
@ -105,20 +107,20 @@ Parser.prototype.getTimestampRegexp = function ( format, digitsRegexp, tzAbbrs )
 				num = '1,2';
 				break;
 			case 'l':
-				s += regexpAlternateGroup( getMessages( [
+				s += regexpAlternateGroup( getMessages( contLangVariant, [
 					'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
 					'friday', 'saturday'
 				] ) );
 				break;
 			case 'F':
-				s += regexpAlternateGroup( getMessages( [
+				s += regexpAlternateGroup( getMessages( contLangVariant, [
 					'january', 'february', 'march', 'april', 'may_long', 'june',
 					'july', 'august', 'september', 'october', 'november',
 					'december'
 				] ) );
 				break;
 			case 'M':
-				s += regexpAlternateGroup( getMessages( [
+				s += regexpAlternateGroup( getMessages( contLangVariant, [
 					'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
 					'sep', 'oct', 'nov', 'dec'
 				] ) );
@ -186,6 +188,7 @@ Parser.prototype.getTimestampRegexp = function ( format, digitsRegexp, tzAbbrs )
 * of matching the regexp returned by #getTimestampRegexp.
 *
 * @private
 * @param {string} contLangVariant Content language variant
 * @param {string} format Date format, as used by MediaWiki
 * @param {string[]|null} digits Localised digits from 0 to 9, e.g. `[ '0', '1', ..., '9' ]`
 * @param {string} localTimezone Local timezone IANA name, e.g. `America/New_York`
@ -193,7 +196,7 @@ Parser.prototype.getTimestampRegexp = function ( format, digitsRegexp, tzAbbrs )
 *   for the local timezone, e.g. `{EDT: "EDT", EST: "EST"}`
 * @return {TimestampParser} Timestamp parser function
 */
-Parser.prototype.getTimestampParser = function ( format, digits, localTimezone, tzAbbrs ) {
+Parser.prototype.getTimestampParser = function ( contLangVariant, format, digits, localTimezone, tzAbbrs ) {
 	var p, code, endQuote, matchingGroups = [];
 	for ( p = 0; p < format.length; p++ ) {
 		code = format[ p ];
@ -280,7 +283,7 @@ Parser.prototype.getTimestampParser = function ( format, digits, localTimezone,
 			switch ( code2 ) {
 				case 'xg':
-					monthIdx = getMessages( [
+					monthIdx = getMessages( contLangVariant, [
 						'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
 						'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
 						'december-gen'
@ -295,14 +298,14 @@ Parser.prototype.getTimestampParser = function ( format, digits, localTimezone,
 					// Day of the week - unused
 					break;
 				case 'F':
-					monthIdx = getMessages( [
+					monthIdx = getMessages( contLangVariant, [
 						'january', 'february', 'march', 'april', 'may_long', 'june',
 						'july', 'august', 'september', 'october', 'november',
 						'december'
 					] ).indexOf( text );
 					break;
 				case 'M':
-					monthIdx = getMessages( [
+					monthIdx = getMessages( contLangVariant, [
 						'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
 						'sep', 'oct', 'nov', 'dec'
 					] ).indexOf( text );
@ -362,37 +365,45 @@ Parser.prototype.getTimestampParser = function ( format, digits, localTimezone,
 };
 /**
- * Get a regexp that matches timestamps in the local date format.
+ * Get a regexp that matches timestamps in the local date format, for each language variant.
 *
 * This calls #getTimestampRegexp with predefined data for the current wiki.
 *
 * @private
- * @return {string} Regular expression
+ * @return {string[]} Regular expressions
 */
-Parser.prototype.getLocalTimestampRegexp = function () {
+Parser.prototype.getLocalTimestampRegexps = function () {
-	return this.getTimestampRegexp(
+	var parser = this;
-		data.dateFormat,
+	return Object.keys( data.dateFormat ).map( function ( contLangVariant ) {
-		data.digits ? '[' + data.digits.join( '' ) + ']' : '\\d',
+		return parser.getTimestampRegexp(
-		data.timezones
+			contLangVariant,
-	);
+			data.dateFormat[ contLangVariant ],
 			'[' + data.digits[ contLangVariant ].join( '' ) + ']',
 			data.timezones[ contLangVariant ]
 		);
 	} );
 };
 /**
- * Get a function that parses timestamps in the local date format, based on the result
+ * Get a function that parses timestamps in the local date format, for each language variant,
- * of matching the regexp returned by #getLocalTimestampRegexp.
+ * based on the result of matching the regexps returned by #getLocalTimestampRegexps.
 *
 * This calls #getTimestampParser with predefined data for the current wiki.
 *
 * @private
- * @return {TimestampParser} Timestamp parser function
+ * @return {TimestampParser[]} Timestamp parser functions
 */
-Parser.prototype.getLocalTimestampParser = function () {
+Parser.prototype.getLocalTimestampParsers = function () {
-	return this.getTimestampParser(
+	var parser = this;
-		data.dateFormat,
+	return Object.keys( data.dateFormat ).map( function ( contLangVariant ) {
-		data.digits,
+		return parser.getTimestampParser(
-		data.localTimezone,
+			contLangVariant,
-		data.timezones
+			data.dateFormat[ contLangVariant ],
-	);
+			data.digits[ contLangVariant ],
 			data.localTimezone,
 			data.timezones[ contLangVariant ]
 		);
 	} );
 };
 /**
@ -411,16 +422,19 @@ function acceptOnlyNodesAllowingComments( node ) {
 }
 /**
- * Find a timestamps in a given text node
+ * Find a timestamp in a given text node
 *
 * @private
 * @param {Text} node Text node
- * @param {string} timestampRegex Timestamp regex
+ * @param {string[]} timestampRegexps Timestamp regexps
- * @return {Array} Regexp match data, which specifies the location of the match,
+ * @return {Object|null} Object with the following keys:
- *  and which can be parsed using #getLocalTimestampParser
+ *   - {number} offset Length of extra text preceding the node that was used for matching
 *   - {number} parserIndex Which of the regexps matched
 *   - {Array} matchData Regexp match data, which specifies the location of the match,
 *     and which can be parsed using #getLocalTimestampParsers
 */
-Parser.prototype.findTimestamp = function ( node, timestampRegex ) {
+Parser.prototype.findTimestamp = function ( node, timestampRegexps ) {
-	var matchData,
+	var matchData, i,
 		nodeText = '',
 		offset = 0;
 	while ( node ) {
@ -453,14 +467,19 @@ Parser.prototype.findTimestamp = function ( node, timestampRegex ) {
 		}
 	}
-	// Technically, there could be multiple matches in a single text node. However, the ultimate
+	for ( i = 0; i < timestampRegexps.length; i++ ) {
-	// point of this is to find the signatures which precede the timestamps, and any later
+		// Technically, there could be multiple matches in a single text node. However, the ultimate
-	// timestamps in the text node can't be directly preceded by a signature (as we require them to
+		// point of this is to find the signatures which precede the timestamps, and any later
-	// have links), so we only concern ourselves with the first match.
+		// timestamps in the text node can't be directly preceded by a signature (as we require them to
-	matchData = nodeText.match( timestampRegex );
+		// have links), so we only concern ourselves with the first match.
-	if ( matchData ) {
+		matchData = nodeText.match( timestampRegexps[ i ] );
-		matchData.offset = offset;
+		if ( matchData ) {
-		return matchData;
+			return {
 				matchData: matchData,
 				offset: offset,
 				parserIndex: i
 			};
 		}
 	}
 	return null;
 };
@ -714,8 +733,8 @@ Parser.prototype.findCommentById = function ( id ) {
 Parser.prototype.buildThreadItems = function () {
 	var
-		dfParser = this.getLocalTimestampParser(),
+		dfParsers = this.getLocalTimestampParsers(),
-		timestampRegex = this.getLocalTimestampRegexp(),
+		timestampRegexps = this.getLocalTimestampRegexps(),
 		commentItems = [],
 		threadItems = [],
 		treeWalker,
@ -754,7 +773,7 @@ Parser.prototype.buildThreadItems = function () {
 			curComment = new HeadingItem( range );
 			curComment.rootNode = this.rootNode;
 			threadItems.push( curComment );
-		} else if ( node.nodeType === Node.TEXT_NODE && ( match = this.findTimestamp( node, timestampRegex ) ) ) {
+		} else if ( node.nodeType === Node.TEXT_NODE && ( match = this.findTimestamp( node, timestampRegexps ) ) ) {
 			warnings = [];
 			foundSignature = this.findSignature( node, lastSigNode );
 			author = foundSignature[ 1 ];
@ -768,7 +787,7 @@ Parser.prototype.buildThreadItems = function () {
 			}
 			lastSigNodeOffset = lastSigNode === node ?
-				match.index + match[ 0 ].length - match.offset :
+				match.matchData.index + match.matchData[ 0 ].length - match.offset :
 				utils.childIndexOf( lastSigNode ) + 1;
 			sigRange = {
 				startContainer: firstSigNode.parentNode,
@ -813,7 +832,7 @@ Parser.prototype.buildThreadItems = function () {
 			// Should this use the indent level of `startNode` or `node`?
 			level = Math.min( startLevel, endLevel );
-			dateTime = dfParser( match );
+			dateTime = dfParsers[ match.parserIndex ]( match.matchData );
 			if ( dateTime.discussionToolsWarning ) {
 				warnings.push( dateTime.discussionToolsWarning );
 			}
--- a/tests/data/arwiki-data.json
+++ b/tests/data/arwiki-data.json
@ -1,74 +1,82 @@
 {
-  "dateFormat": "H:i، j xg Y",
+  "dateFormat": {
-  "digits": [
+    "ar": "H:i، j xg Y"
-    "0",
+  },
-    "1",
+  "digits": {
-    "2",
+    "ar": [
-    "3",
+      "0",
-    "4",
+      "1",
-    "5",
+      "2",
-    "6",
+      "3",
-    "7",
+      "4",
-    "8",
+      "5",
-    "9"
+      "6",
-  ],
+      "7",
      "8",
      "9"
    ]
  },
  "localTimezone": "UTC",
  "specialContributionsName": "مساهمات",
  "timezones": {
-    "ت ع م": "UTC"
+    "ar": {
      "ت ع م": "UTC"
    }
  },
  "contLangMessages": {
-    "timezone-utc": "ت ع م",
+    "ar": {
-    "sunday": "الأحد",
+      "timezone-utc": "ت ع م",
-    "monday": "الاثنين",
+      "sunday": "الأحد",
-    "tuesday": "الثلاثاء",
+      "monday": "الاثنين",
-    "wednesday": "الأربعاء",
+      "tuesday": "الثلاثاء",
-    "thursday": "الخميس",
+      "wednesday": "الأربعاء",
-    "friday": "الجمعة",
+      "thursday": "الخميس",
-    "saturday": "السبت",
+      "friday": "الجمعة",
-    "sun": "أحد",
+      "saturday": "السبت",
-    "mon": "اثنين",
+      "sun": "أحد",
-    "tue": "ثلاثاء",
+      "mon": "اثنين",
-    "wed": "أربعاء",
+      "tue": "ثلاثاء",
-    "thu": "خميس",
+      "wed": "أربعاء",
-    "fri": "جمعة",
+      "thu": "خميس",
-    "sat": "سبت",
+      "fri": "جمعة",
-    "january": "يناير",
+      "sat": "سبت",
-    "february": "فبراير",
+      "january": "يناير",
-    "march": "مارس",
+      "february": "فبراير",
-    "april": "أبريل",
+      "march": "مارس",
-    "may_long": "مايو",
+      "april": "أبريل",
-    "june": "يونيو",
+      "may_long": "مايو",
-    "july": "يوليو",
+      "june": "يونيو",
-    "august": "أغسطس",
+      "july": "يوليو",
-    "september": "سبتمبر",
+      "august": "أغسطس",
-    "october": "أكتوبر",
+      "september": "سبتمبر",
-    "november": "نوفمبر",
+      "october": "أكتوبر",
-    "december": "ديسمبر",
+      "november": "نوفمبر",
-    "january-gen": "يناير",
+      "december": "ديسمبر",
-    "february-gen": "فبراير",
+      "january-gen": "يناير",
-    "march-gen": "مارس",
+      "february-gen": "فبراير",
-    "april-gen": "أبريل",
+      "march-gen": "مارس",
-    "may-gen": "مايو",
+      "april-gen": "أبريل",
-    "june-gen": "يونيو",
+      "may-gen": "مايو",
-    "july-gen": "يوليو",
+      "june-gen": "يونيو",
-    "august-gen": "أغسطس",
+      "july-gen": "يوليو",
-    "september-gen": "سبتمبر",
+      "august-gen": "أغسطس",
-    "october-gen": "أكتوبر",
+      "september-gen": "سبتمبر",
-    "november-gen": "نوفمبر",
+      "october-gen": "أكتوبر",
-    "december-gen": "ديسمبر",
+      "november-gen": "نوفمبر",
-    "jan": "يناير",
+      "december-gen": "ديسمبر",
-    "feb": "فبراير",
+      "jan": "يناير",
-    "mar": "مارس",
+      "feb": "فبراير",
-    "apr": "أبريل",
+      "mar": "مارس",
-    "may": "مايو",
+      "apr": "أبريل",
-    "jun": "يونيو",
+      "may": "مايو",
-    "jul": "يوليو",
+      "jun": "يونيو",
-    "aug": "أغسطس",
+      "jul": "يوليو",
-    "sep": "سبتمبر",
+      "aug": "أغسطس",
-    "oct": "أكتوبر",
+      "sep": "سبتمبر",
-    "nov": "نوفمبر",
+      "oct": "أكتوبر",
-    "dec": "ديسمبر"
+      "nov": "نوفمبر",
      "dec": "ديسمبر"
    }
  },
  "signatureScanLimit": 100
 }
--- a/tests/data/ckbwiki-data.json
+++ b/tests/data/ckbwiki-data.json
@ -1,73 +1,81 @@
 {
-  "dateFormat": "‏H:i، jی xg Y",
+  "dateFormat": {
-  "digits": [
+    "ckb": "‏H:i، jی xg Y"
-    "٠",
+  },
-    "١",
+  "digits": {
-    "٢",
+    "ckb": [
-    "٣",
+      "٠",
-    "٤",
+      "١",
-    "٥",
+      "٢",
-    "٦",
+      "٣",
-    "٧",
+      "٤",
-    "٨",
+      "٥",
-    "٩"
+      "٦",
-  ],
+      "٧",
      "٨",
      "٩"
    ]
  },
  "localTimezone": "UTC",
  "specialContributionsName": "بەشدارییەکان",
  "timezones": {
-    "UTC": "UTC"
+    "ckb": {
      "UTC": "UTC"
    }
  },
  "contLangMessages": {
-    "sunday": "یەکشەممە",
+    "ckb": {
-    "monday": "دووشەممە",
+      "sunday": "یەکشەممە",
-    "tuesday": "سێشەممە",
+      "monday": "دووشەممە",
-    "wednesday": "چوارشەممە",
+      "tuesday": "سێشەممە",
-    "thursday": "پێنجشەممە",
+      "wednesday": "چوارشەممە",
-    "friday": "ھەینی",
+      "thursday": "پێنجشەممە",
-    "saturday": "شەممە",
+      "friday": "ھەینی",
-    "sun": "یەکشەممە",
+      "saturday": "شەممە",
-    "mon": "دووشەممە",
+      "sun": "یەکشەممە",
-    "tue": "سێشەممە",
+      "mon": "دووشەممە",
-    "wed": "چوارشەممە",
+      "tue": "سێشەممە",
-    "thu": "پێنجشەممە",
+      "wed": "چوارشەممە",
-    "fri": "ھەینی",
+      "thu": "پێنجشەممە",
-    "sat": "شەممە",
+      "fri": "ھەینی",
-    "january": "کانوونی دووەم",
+      "sat": "شەممە",
-    "february": "شوبات",
+      "january": "کانوونی دووەم",
-    "march": "ئازار",
+      "february": "شوبات",
-    "april": "نیسان",
+      "march": "ئازار",
-    "may_long": "ئایار",
+      "april": "نیسان",
-    "june": "حوزەیران",
+      "may_long": "ئایار",
-    "july": "تەممووز",
+      "june": "حوزەیران",
-    "august": "ئاب",
+      "july": "تەممووز",
-    "september": "ئەیلوول",
+      "august": "ئاب",
-    "october": "تشرینی یەکەم",
+      "september": "ئەیلوول",
-    "november": "تشرینی دووەم",
+      "october": "تشرینی یەکەم",
-    "december": "کانوونی یەکەم",
+      "november": "تشرینی دووەم",
-    "january-gen": "کانوونی دووەمی",
+      "december": "کانوونی یەکەم",
-    "february-gen": "شوباتی",
+      "january-gen": "کانوونی دووەمی",
-    "march-gen": "ئازاری",
+      "february-gen": "شوباتی",
-    "april-gen": "نیسانی",
+      "march-gen": "ئازاری",
-    "may-gen": "ئایاری",
+      "april-gen": "نیسانی",
-    "june-gen": "حوزەیرانی",
+      "may-gen": "ئایاری",
-    "july-gen": "تەممووزی",
+      "june-gen": "حوزەیرانی",
-    "august-gen": "ئابی",
+      "july-gen": "تەممووزی",
-    "september-gen": "ئەیلوولی",
+      "august-gen": "ئابی",
-    "october-gen": "تشرینی یەکەمی",
+      "september-gen": "ئەیلوولی",
-    "november-gen": "تشرینی دووەمی",
+      "october-gen": "تشرینی یەکەمی",
-    "december-gen": "کانوونی یەکەمی",
+      "november-gen": "تشرینی دووەمی",
-    "jan": "کانوونی دووەم",
+      "december-gen": "کانوونی یەکەمی",
-    "feb": "شوبات",
+      "jan": "کانوونی دووەم",
-    "mar": "ئازار",
+      "feb": "شوبات",
-    "apr": "نیسان",
+      "mar": "ئازار",
-    "may": "ئایار",
+      "apr": "نیسان",
-    "jun": "حوزەیران",
+      "may": "ئایار",
-    "jul": "تەممووز",
+      "jun": "حوزەیران",
-    "aug": "ئاب",
+      "jul": "تەممووز",
-    "sep": "ئەیلوول",
+      "aug": "ئاب",
-    "oct": "تشرینی یەکەم",
+      "sep": "ئەیلوول",
-    "nov": "تشرینی دووەم",
+      "oct": "تشرینی یەکەم",
-    "dec": "کانوونی یەکەم"
+      "nov": "تشرینی دووەم",
      "dec": "کانوونی یەکەم"
    }
  },
  "signatureScanLimit": 100
 }
--- a/tests/data/enwiki-data.json
+++ b/tests/data/enwiki-data.json
@ -1,73 +1,81 @@
 {
-  "dateFormat": "H:i, j F Y",
+  "dateFormat": {
-  "digits": [
+    "en": "H:i, j F Y"
-    "0",
+  },
-    "1",
+  "digits": {
-    "2",
+    "en": [
-    "3",
+      "0",
-    "4",
+      "1",
-    "5",
+      "2",
-    "6",
+      "3",
-    "7",
+      "4",
-    "8",
+      "5",
-    "9"
+      "6",
-  ],
+      "7",
      "8",
      "9"
    ]
  },
  "localTimezone": "UTC",
  "specialContributionsName": "Contributions",
  "timezones": {
-    "UTC": "UTC"
+    "en": {
      "UTC": "UTC"
    }
  },
  "contLangMessages": {
-    "sunday": "Sunday",
+    "en": {
-    "monday": "Monday",
+      "sunday": "Sunday",
-    "tuesday": "Tuesday",
+      "monday": "Monday",
-    "wednesday": "Wednesday",
+      "tuesday": "Tuesday",
-    "thursday": "Thursday",
+      "wednesday": "Wednesday",
-    "friday": "Friday",
+      "thursday": "Thursday",
-    "saturday": "Saturday",
+      "friday": "Friday",
-    "sun": "Sun",
+      "saturday": "Saturday",
-    "mon": "Mon",
+      "sun": "Sun",
-    "tue": "Tue",
+      "mon": "Mon",
-    "wed": "Wed",
+      "tue": "Tue",
-    "thu": "Thu",
+      "wed": "Wed",
-    "fri": "Fri",
+      "thu": "Thu",
-    "sat": "Sat",
+      "fri": "Fri",
-    "january": "January",
+      "sat": "Sat",
-    "february": "February",
+      "january": "January",
-    "march": "March",
+      "february": "February",
-    "april": "April",
+      "march": "March",
-    "may_long": "May",
+      "april": "April",
-    "june": "June",
+      "may_long": "May",
-    "july": "July",
+      "june": "June",
-    "august": "August",
+      "july": "July",
-    "september": "September",
+      "august": "August",
-    "october": "October",
+      "september": "September",
-    "november": "November",
+      "october": "October",
-    "december": "December",
+      "november": "November",
-    "january-gen": "January",
+      "december": "December",
-    "february-gen": "February",
+      "january-gen": "January",
-    "march-gen": "March",
+      "february-gen": "February",
-    "april-gen": "April",
+      "march-gen": "March",
-    "may-gen": "May",
+      "april-gen": "April",
-    "june-gen": "June",
+      "may-gen": "May",
-    "july-gen": "July",
+      "june-gen": "June",
-    "august-gen": "August",
+      "july-gen": "July",
-    "september-gen": "September",
+      "august-gen": "August",
-    "october-gen": "October",
+      "september-gen": "September",
-    "november-gen": "November",
+      "october-gen": "October",
-    "december-gen": "December",
+      "november-gen": "November",
-    "jan": "Jan",
+      "december-gen": "December",
-    "feb": "Feb",
+      "jan": "Jan",
-    "mar": "Mar",
+      "feb": "Feb",
-    "apr": "Apr",
+      "mar": "Mar",
-    "may": "May",
+      "apr": "Apr",
-    "jun": "Jun",
+      "may": "May",
-    "jul": "Jul",
+      "jun": "Jun",
-    "aug": "Aug",
+      "jul": "Jul",
-    "sep": "Sep",
+      "aug": "Aug",
-    "oct": "Oct",
+      "sep": "Sep",
-    "nov": "Nov",
+      "oct": "Oct",
-    "dec": "Dec"
+      "nov": "Nov",
      "dec": "Dec"
    }
  },
  "signatureScanLimit": 100
 }
--- a/tests/data/frwiki-data.json
+++ b/tests/data/frwiki-data.json
@ -1,78 +1,86 @@
 {
-  "dateFormat": "j F Y à H:i",
+  "dateFormat": {
-  "digits": [
+    "fr": "j F Y à H:i"
-    "0",
+  },
-    "1",
+  "digits": {
-    "2",
+    "fr": [
-    "3",
+      "0",
-    "4",
+      "1",
-    "5",
+      "2",
-    "6",
+      "3",
-    "7",
+      "4",
-    "8",
+      "5",
-    "9"
+      "6",
-  ],
+      "7",
      "8",
      "9"
    ]
  },
  "localTimezone": "Europe/Paris",
  "specialContributionsName": "Contributions",
  "timezones": {
-    "CEST": "CEST",
+    "fr": {
-    "CET": "CET",
+      "CEST": "CEST",
-    "PMT": "PMT",
+      "CET": "CET",
-    "WEMT": "WEMT",
+      "PMT": "PMT",
-    "WEST": "WEST",
+      "WEMT": "WEMT",
-    "WET": "WET"
+      "WEST": "WEST",
      "WET": "WET"
    }
  },
  "contLangMessages": {
-    "sunday": "dimanche",
+    "fr": {
-    "monday": "lundi",
+      "sunday": "dimanche",
-    "tuesday": "mardi",
+      "monday": "lundi",
-    "wednesday": "mercredi",
+      "tuesday": "mardi",
-    "thursday": "jeudi",
+      "wednesday": "mercredi",
-    "friday": "vendredi",
+      "thursday": "jeudi",
-    "saturday": "samedi",
+      "friday": "vendredi",
-    "sun": "dim.",
+      "saturday": "samedi",
-    "mon": "lun.",
+      "sun": "dim.",
-    "tue": "mar.",
+      "mon": "lun.",
-    "wed": "mer.",
+      "tue": "mar.",
-    "thu": "jeu.",
+      "wed": "mer.",
-    "fri": "ven.",
+      "thu": "jeu.",
-    "sat": "sam.",
+      "fri": "ven.",
-    "january": "janvier",
+      "sat": "sam.",
-    "february": "février",
+      "january": "janvier",
-    "march": "mars",
+      "february": "février",
-    "april": "avril",
+      "march": "mars",
-    "may_long": "mai",
+      "april": "avril",
-    "june": "juin",
+      "may_long": "mai",
-    "july": "juillet",
+      "june": "juin",
-    "august": "août",
+      "july": "juillet",
-    "september": "septembre",
+      "august": "août",
-    "october": "octobre",
+      "september": "septembre",
-    "november": "novembre",
+      "october": "octobre",
-    "december": "décembre",
+      "november": "novembre",
-    "january-gen": "janvier",
+      "december": "décembre",
-    "february-gen": "février",
+      "january-gen": "janvier",
-    "march-gen": "mars",
+      "february-gen": "février",
-    "april-gen": "avril",
+      "march-gen": "mars",
-    "may-gen": "mai",
+      "april-gen": "avril",
-    "june-gen": "juin",
+      "may-gen": "mai",
-    "july-gen": "juillet",
+      "june-gen": "juin",
-    "august-gen": "août",
+      "july-gen": "juillet",
-    "september-gen": "septembre",
+      "august-gen": "août",
-    "october-gen": "octobre",
+      "september-gen": "septembre",
-    "november-gen": "novembre",
+      "october-gen": "octobre",
-    "december-gen": "décembre",
+      "november-gen": "novembre",
-    "jan": "janv.",
+      "december-gen": "décembre",
-    "feb": "fév.",
+      "jan": "janv.",
-    "mar": "mars",
+      "feb": "fév.",
-    "apr": "avr.",
+      "mar": "mars",
-    "may": "mai",
+      "apr": "avr.",
-    "jun": "juin",
+      "may": "mai",
-    "jul": "juill.",
+      "jun": "juin",
-    "aug": "août",
+      "jul": "juill.",
-    "sep": "sept.",
+      "aug": "août",
-    "oct": "oct.",
+      "sep": "sept.",
-    "nov": "nov.",
+      "oct": "oct.",
-    "dec": "déc."
+      "nov": "nov.",
      "dec": "déc."
    }
  },
  "signatureScanLimit": 100
 }
--- a/tests/data/huwiki-data.json
+++ b/tests/data/huwiki-data.json
@ -1,75 +1,83 @@
 {
-  "dateFormat": "Y. F j., H:i",
+  "dateFormat": {
-  "digits": [
+    "hu": "Y. F j., H:i"
-    "0",
+  },
-    "1",
+  "digits": {
-    "2",
+    "hu": [
-    "3",
+      "0",
-    "4",
+      "1",
-    "5",
+      "2",
-    "6",
+      "3",
-    "7",
+      "4",
-    "8",
+      "5",
-    "9"
+      "6",
-  ],
+      "7",
      "8",
      "9"
    ]
  },
  "localTimezone": "Europe/Berlin",
  "specialContributionsName": "Szerkesztő_közreműködései",
  "timezones": {
-    "CEMT": "CEMT",
+    "hu": {
-    "CEST": "CEST",
+      "CEMT": "CEMT",
-    "CET": "CET"
+      "CEST": "CEST",
      "CET": "CET"
    }
  },
  "contLangMessages": {
-    "sunday": "vasárnap",
+    "hu": {
-    "monday": "hétfő",
+      "sunday": "vasárnap",
-    "tuesday": "kedd",
+      "monday": "hétfő",
-    "wednesday": "szerda",
+      "tuesday": "kedd",
-    "thursday": "csütörtök",
+      "wednesday": "szerda",
-    "friday": "péntek",
+      "thursday": "csütörtök",
-    "saturday": "szombat",
+      "friday": "péntek",
-    "sun": "vas",
+      "saturday": "szombat",
-    "mon": "hét",
+      "sun": "vas",
-    "tue": "kedd",
+      "mon": "hét",
-    "wed": "sze",
+      "tue": "kedd",
-    "thu": "csü",
+      "wed": "sze",
-    "fri": "pén",
+      "thu": "csü",
-    "sat": "szo",
+      "fri": "pén",
-    "january": "január",
+      "sat": "szo",
-    "february": "február",
+      "january": "január",
-    "march": "március",
+      "february": "február",
-    "april": "április",
+      "march": "március",
-    "may_long": "május",
+      "april": "április",
-    "june": "június",
+      "may_long": "május",
-    "july": "július",
+      "june": "június",
-    "august": "augusztus",
+      "july": "július",
-    "september": "szeptember",
+      "august": "augusztus",
-    "october": "október",
+      "september": "szeptember",
-    "november": "november",
+      "october": "október",
-    "december": "december",
+      "november": "november",
-    "january-gen": "január",
+      "december": "december",
-    "february-gen": "február",
+      "january-gen": "január",
-    "march-gen": "március",
+      "february-gen": "február",
-    "april-gen": "április",
+      "march-gen": "március",
-    "may-gen": "május",
+      "april-gen": "április",
-    "june-gen": "június",
+      "may-gen": "május",
-    "july-gen": "július",
+      "june-gen": "június",
-    "august-gen": "augusztus",
+      "july-gen": "július",
-    "september-gen": "szeptember",
+      "august-gen": "augusztus",
-    "october-gen": "október",
+      "september-gen": "szeptember",
-    "november-gen": "november",
+      "october-gen": "október",
-    "december-gen": "december",
+      "november-gen": "november",
-    "jan": "jan",
+      "december-gen": "december",
-    "feb": "febr",
+      "jan": "jan",
-    "mar": "márc",
+      "feb": "febr",
-    "apr": "ápr",
+      "mar": "márc",
-    "may": "máj",
+      "apr": "ápr",
-    "jun": "jún",
+      "may": "máj",
-    "jul": "júl",
+      "jun": "jún",
-    "aug": "aug",
+      "jul": "júl",
-    "sep": "szept",
+      "aug": "aug",
-    "oct": "okt",
+      "sep": "szept",
-    "nov": "nov",
+      "oct": "okt",
-    "dec": "dec"
+      "nov": "nov",
      "dec": "dec"
    }
  },
  "signatureScanLimit": 100
 }
--- a/tests/data/nlwiki-data.json
+++ b/tests/data/nlwiki-data.json
@ -1,75 +1,83 @@
 {
-  "dateFormat": "j M Y H:i",
+  "dateFormat": {
-  "digits": [
+    "nl": "j M Y H:i"
-    "0",
+  },
-    "1",
+  "digits": {
-    "2",
+    "nl": [
-    "3",
+      "0",
-    "4",
+      "1",
-    "5",
+      "2",
-    "6",
+      "3",
-    "7",
+      "4",
-    "8",
+      "5",
-    "9"
+      "6",
-  ],
+      "7",
      "8",
      "9"
    ]
  },
  "localTimezone": "Europe/Berlin",
  "specialContributionsName": "Bijdragen",
  "timezones": {
-    "CEMT": "CEMT",
+    "nl": {
-    "CEST": "CEST",
+      "CEMT": "CEMT",
-    "CET": "CET"
+      "CEST": "CEST",
      "CET": "CET"
    }
  },
  "contLangMessages": {
-    "sunday": "zondag",
+    "nl": {
-    "monday": "maandag",
+      "sunday": "zondag",
-    "tuesday": "dinsdag",
+      "monday": "maandag",
-    "wednesday": "woensdag",
+      "tuesday": "dinsdag",
-    "thursday": "donderdag",
+      "wednesday": "woensdag",
-    "friday": "vrijdag",
+      "thursday": "donderdag",
-    "saturday": "zaterdag",
+      "friday": "vrijdag",
-    "sun": "zo",
+      "saturday": "zaterdag",
-    "mon": "ma",
+      "sun": "zo",
-    "tue": "di",
+      "mon": "ma",
-    "wed": "wo",
+      "tue": "di",
-    "thu": "do",
+      "wed": "wo",
-    "fri": "vr",
+      "thu": "do",
-    "sat": "za",
+      "fri": "vr",
-    "january": "januari",
+      "sat": "za",
-    "february": "februari",
+      "january": "januari",
-    "march": "maart",
+      "february": "februari",
-    "april": "april",
+      "march": "maart",
-    "may_long": "mei",
+      "april": "april",
-    "june": "juni",
+      "may_long": "mei",
-    "july": "juli",
+      "june": "juni",
-    "august": "augustus",
+      "july": "juli",
-    "september": "september",
+      "august": "augustus",
-    "october": "oktober",
+      "september": "september",
-    "november": "november",
+      "october": "oktober",
-    "december": "december",
+      "november": "november",
-    "january-gen": "januari",
+      "december": "december",
-    "february-gen": "februari",
+      "january-gen": "januari",
-    "march-gen": "maart",
+      "february-gen": "februari",
-    "april-gen": "april",
+      "march-gen": "maart",
-    "may-gen": "mei",
+      "april-gen": "april",
-    "june-gen": "juni",
+      "may-gen": "mei",
-    "july-gen": "juli",
+      "june-gen": "juni",
-    "august-gen": "augustus",
+      "july-gen": "juli",
-    "september-gen": "september",
+      "august-gen": "augustus",
-    "october-gen": "oktober",
+      "september-gen": "september",
-    "november-gen": "november",
+      "october-gen": "oktober",
-    "december-gen": "december",
+      "november-gen": "november",
-    "jan": "jan",
+      "december-gen": "december",
-    "feb": "feb",
+      "jan": "jan",
-    "mar": "mrt",
+      "feb": "feb",
-    "apr": "apr",
+      "mar": "mrt",
-    "may": "mei",
+      "apr": "apr",
-    "jun": "jun",
+      "may": "mei",
-    "jul": "jul",
+      "jun": "jun",
-    "aug": "aug",
+      "jul": "jul",
-    "sep": "sep",
+      "aug": "aug",
-    "oct": "okt",
+      "sep": "sep",
-    "nov": "nov",
+      "oct": "okt",
-    "dec": "dec"
+      "nov": "nov",
      "dec": "dec"
    }
  },
  "signatureScanLimit": 100
 }
--- a/tests/data/plwiki-data.json
+++ b/tests/data/plwiki-data.json
@ -1,77 +1,85 @@
 {
-  "dateFormat": "H:i, j M Y",
+  "dateFormat": {
-  "digits": [
+    "pl": "H:i, j M Y"
-    "0",
+  },
-    "1",
+  "digits": {
-    "2",
+    "pl": [
-    "3",
+      "0",
-    "4",
+      "1",
-    "5",
+      "2",
-    "6",
+      "3",
-    "7",
+      "4",
-    "8",
+      "5",
-    "9"
+      "6",
-  ],
+      "7",
      "8",
      "9"
    ]
  },
  "localTimezone": "Europe/Warsaw",
  "specialContributionsName": "Wkład",
  "timezones": {
-    "CEST": "CEST",
+    "pl": {
-    "CET": "CET",
+      "CEST": "CEST",
-    "EEST": "EEST",
+      "CET": "CET",
-    "EET": "EET",
+      "EEST": "EEST",
-    "WMT": "WMT"
+      "EET": "EET",
      "WMT": "WMT"
    }
  },
  "contLangMessages": {
-    "sunday": "niedziela",
+    "pl": {
-    "monday": "poniedziałek",
+      "sunday": "niedziela",
-    "tuesday": "wtorek",
+      "monday": "poniedziałek",
-    "wednesday": "środa",
+      "tuesday": "wtorek",
-    "thursday": "czwartek",
+      "wednesday": "środa",
-    "friday": "piątek",
+      "thursday": "czwartek",
-    "saturday": "sobota",
+      "friday": "piątek",
-    "sun": "N",
+      "saturday": "sobota",
-    "mon": "Pn",
+      "sun": "N",
-    "tue": "Wt",
+      "mon": "Pn",
-    "wed": "Śr",
+      "tue": "Wt",
-    "thu": "Cz",
+      "wed": "Śr",
-    "fri": "Pt",
+      "thu": "Cz",
-    "sat": "So",
+      "fri": "Pt",
-    "january": "styczeń",
+      "sat": "So",
-    "february": "luty",
+      "january": "styczeń",
-    "march": "marzec",
+      "february": "luty",
-    "april": "kwiecień",
+      "march": "marzec",
-    "may_long": "maj",
+      "april": "kwiecień",
-    "june": "czerwiec",
+      "may_long": "maj",
-    "july": "lipiec",
+      "june": "czerwiec",
-    "august": "sierpień",
+      "july": "lipiec",
-    "september": "wrzesień",
+      "august": "sierpień",
-    "october": "październik",
+      "september": "wrzesień",
-    "november": "listopad",
+      "october": "październik",
-    "december": "grudzień",
+      "november": "listopad",
-    "january-gen": "stycznia",
+      "december": "grudzień",
-    "february-gen": "lutego",
+      "january-gen": "stycznia",
-    "march-gen": "marca",
+      "february-gen": "lutego",
-    "april-gen": "kwietnia",
+      "march-gen": "marca",
-    "may-gen": "maja",
+      "april-gen": "kwietnia",
-    "june-gen": "czerwca",
+      "may-gen": "maja",
-    "july-gen": "lipca",
+      "june-gen": "czerwca",
-    "august-gen": "sierpnia",
+      "july-gen": "lipca",
-    "september-gen": "września",
+      "august-gen": "sierpnia",
-    "october-gen": "października",
+      "september-gen": "września",
-    "november-gen": "listopada",
+      "october-gen": "października",
-    "december-gen": "grudnia",
+      "november-gen": "listopada",
-    "jan": "sty",
+      "december-gen": "grudnia",
-    "feb": "lut",
+      "jan": "sty",
-    "mar": "mar",
+      "feb": "lut",
-    "apr": "kwi",
+      "mar": "mar",
-    "may": "maj",
+      "apr": "kwi",
-    "jun": "cze",
+      "may": "maj",
-    "jul": "lip",
+      "jun": "cze",
-    "aug": "sie",
+      "jul": "lip",
-    "sep": "wrz",
+      "aug": "sie",
-    "oct": "paź",
+      "sep": "wrz",
-    "nov": "lis",
+      "oct": "paź",
-    "dec": "gru"
+      "nov": "lis",
      "dec": "gru"
    }
  },
  "signatureScanLimit": 100
 }
--- a/tests/phpunit/CommentParserTest.php
+++ b/tests/phpunit/CommentParserTest.php
@ -117,7 +117,7 @@ class CommentParserTest extends CommentTestCase {
 		$expected = str_replace( ':', '\:', $expected );
 		$expected = '/' . $expected . '/u';
-		$result = $parser->getTimestampRegexp( $format, '\\d', [ 'UTC' => 'UTC' ] );
+		$result = $parser->getTimestampRegexp( 'en', $format, '\\d', [ 'UTC' => 'UTC' ] );
 		self::assertSame( $expected, $result, $message );
 	}
@ -138,7 +138,7 @@ class CommentParserTest extends CommentTestCase {
 		$expected = new DateTimeImmutable( $expected );
-		$tsParser = $parser->getTimestampParser( $format, null, 'UTC', [ 'UTC' => 'UTC' ] );
+		$tsParser = $parser->getTimestampParser( 'en', $format, null, 'UTC', [ 'UTC' => 'UTC' ] );
 		self::assertEquals( $expected, $tsParser( $data ), $message );
 	}
@ -158,8 +158,8 @@ class CommentParserTest extends CommentTestCase {
 			CommentParser::newFromGlobalState( new DOMELement( 'div' ) )
 		);
-		$regexp = $parser->getTimestampRegexp( $format, '\\d', $timezoneAbbrs );
+		$regexp = $parser->getTimestampRegexp( 'en', $format, '\\d', $timezoneAbbrs );
-		$tsParser = $parser->getTimestampParser( $format, null, $timezone, $timezoneAbbrs );
+		$tsParser = $parser->getTimestampParser( 'en', $format, null, $timezone, $timezoneAbbrs );
 		$expected = new DateTimeImmutable( $expected );
 		$expectedUtc = new DateTimeImmutable( $expectedUtc );
--- a/tests/qunit/parser.test.js
+++ b/tests/qunit/parser.test.js
@ -13,7 +13,7 @@ QUnit.test( '#getTimestampRegexp', function ( assert ) {
 	cases.forEach( function ( caseItem ) {
 		assert.strictEqual(
-			parser.getTimestampRegexp( caseItem.format, '\\d', { UTC: 'UTC' } ),
+			parser.getTimestampRegexp( 'en', caseItem.format, '\\d', { UTC: 'UTC' } ),
 			caseItem.expected,
 			caseItem.message
 		);
@ -27,7 +27,7 @@ QUnit.test( '#getTimestampParser', function ( assert ) {
 	testUtils.overrideParserData( require( '../data-en.json' ) );
 	cases.forEach( function ( caseItem ) {
-		var tsParser = parser.getTimestampParser( caseItem.format, null, 'UTC', { UTC: 'UTC' } ),
+		var tsParser = parser.getTimestampParser( 'en', caseItem.format, null, 'UTC', { UTC: 'UTC' } ),
 			expectedDate = moment( caseItem.expected );
 		assert.ok(
@ -44,8 +44,8 @@ QUnit.test( '#getTimestampParser (at DST change)', function ( assert ) {
 	testUtils.overrideParserData( require( '../data-en.json' ) );
 	cases.forEach( function ( caseItem ) {
-		var regexp = parser.getTimestampRegexp( caseItem.format, '\\d', caseItem.timezoneAbbrs ),
+		var regexp = parser.getTimestampRegexp( 'en', caseItem.format, '\\d', caseItem.timezoneAbbrs ),
-			tsParser = parser.getTimestampParser( caseItem.format, null, caseItem.timezone, caseItem.timezoneAbbrs ),
+			tsParser = parser.getTimestampParser( 'en', caseItem.format, null, caseItem.timezone, caseItem.timezoneAbbrs ),
 			date = tsParser( caseItem.sample.match( regexp ) );
 		assert.ok(