Merge "Support ISO 8601 timestamps in the parser"

2024-11-24 00:13:36 +00:00 · 2023-06-19 12:31:58 +00:00 · 2023-06-19 12:31:58 +00:00 · 0c7c7de242
parent a6a267f368 4ca17b8c33
commit 0c7c7de242
4 changed files with 54 additions and 2 deletions
--- a/includes/CommentParser.php
+++ b/includes/CommentParser.php
@ -184,6 +184,7 @@ class CommentParser {
 	): string {
 		$formatLength = strlen( $format );
 		$s = '';
+		$raw = false;
 		// Adapted from Language::sprintfDate()
 		for ( $p = 0; $p < $formatLength; $p++ ) {
 			$num = false;
@ -204,6 +205,9 @@ class CommentParser {
 						$this->getMessages( $contLangVariant, Language::MONTH_GENITIVE_MESSAGES )
 					);
 					break;
+				case 'xn':
+					$raw = true;
+					break;
 				case 'd':
 					$num = '2';
 					break;
@ -230,6 +234,9 @@ class CommentParser {
 						$this->getMessages( $contLangVariant, Language::MONTH_ABBREVIATED_MESSAGES )
 					);
 					break;
+				case 'm':
+					$num = '2';
+					break;
 				case 'n':
 					$num = '1,2';
 					break;
@ -248,6 +255,9 @@ class CommentParser {
 				case 'i':
 					$num = '2';
 					break;
+				case 's':
+					$num = '2';
+					break;
 				case '\\':
 					// Backslash escaping
 					if ( $p < $formatLength - 1 ) {
@ -279,7 +289,12 @@ class CommentParser {
 					$p += strlen( $char ) - 1;
 			}
 			if ( $num !== false ) {
-				$s .= '(' . $digitsRegexp . '{' . $num . '})';
+				if ( $raw ) {
+					$s .= '([0-9]{' . $num . '})';
+					$raw = false;
+				} else {
+					$s .= '(' . $digitsRegexp . '{' . $num . '})';
+				}
 			}
 			// Ignore some invisible Unicode characters that often sneak into copy-pasted timestamps (T308448)
 			$s .= '[\\x{200E}\\x{200F}]?';
@ -334,6 +349,7 @@ class CommentParser {

 			switch ( $code ) {
 				case 'xx':
+				case 'xn':
 					break;
 				case 'xg':
 				case 'd':
@ -342,12 +358,14 @@ class CommentParser {
 				case 'l':
 				case 'F':
 				case 'M':
+				case 'm':
 				case 'n':
 				case 'Y':
 				case 'xkY':
 				case 'G':
 				case 'H':
 				case 'i':
+				case 's':
 					$matchingGroups[] = $code;
 					break;
 				case '\\':
@ -414,6 +432,7 @@ class CommentParser {
 							$this->getMessages( $contLangVariant, Language::MONTH_ABBREVIATED_MESSAGES )
 						);
 						break;
+					case 'm':
 					case 'n':
 						$monthIdx = intval( $untransformDigits( $text ) ) - 1;
 						break;
@ -431,6 +450,9 @@ class CommentParser {
 					case 'i':
 						$minute = intval( $untransformDigits( $text ) );
 						break;
+					case 's':
+						// Seconds - unused, because most timestamp formats omit them
+						break;
 					default:
 						throw new LogicException( 'Not implemented' );
 				}
--- a/includes/LanguageData.php
+++ b/includes/LanguageData.php
@ -179,18 +179,21 @@ class LanguageData {
 			switch ( $code ) {
 				case 'xx':
 				case 'xg':
+				case 'xn':
 				case 'd':
 				case 'D':
 				case 'j':
 				case 'l':
 				case 'F':
 				case 'M':
+				case 'm':
 				case 'n':
 				case 'Y':
 				case 'xkY':
 				case 'G':
 				case 'H':
 				case 'i':
+				case 's':
 					// Special code - pass through unchanged
 					$s .= $code;
 					break;
--- a/modules/Parser.js
+++ b/modules/Parser.js
@ -96,6 +96,7 @@ Parser.prototype.getTimestampRegexp = function ( contLangVariant, format, digits
 	var parser = this;

 	var s = '';
+	var raw = false;
 	// Adapted from Language::sprintfDate()
 	for ( var p = 0; p < format.length; p++ ) {
 		var num = false;
@ -118,6 +119,9 @@ Parser.prototype.getTimestampRegexp = function ( contLangVariant, format, digits
 					'december-gen'
 				] ) );
 				break;
+			case 'xn':
+				raw = true;
+				break;
 			case 'd':
 				num = '2';
 				break;
@ -148,6 +152,9 @@ Parser.prototype.getTimestampRegexp = function ( contLangVariant, format, digits
 					'sep', 'oct', 'nov', 'dec'
 				] ) );
 				break;
+			case 'm':
+				num = '2';
+				break;
 			case 'n':
 				num = '1,2';
 				break;
@ -166,6 +173,9 @@ Parser.prototype.getTimestampRegexp = function ( contLangVariant, format, digits
 			case 'i':
 				num = '2';
 				break;
+			case 's':
+				num = '2';
+				break;
 			case '\\':
 				// Backslash escaping
 				if ( p < format.length - 1 ) {
@ -197,7 +207,12 @@ Parser.prototype.getTimestampRegexp = function ( contLangVariant, format, digits
 				p += char.length - 1;
 		}
 		if ( num !== false ) {
-			s += regexpGroup( digitsRegexp + '{' + num + '}' );
+			if ( raw ) {
+				s += regexpGroup( '[0-9]{' + num + '}' );
+				raw = false;
+			} else {
+				s += regexpGroup( digitsRegexp + '{' + num + '}' );
+			}
 		}
 		// Ignore some invisible Unicode characters that often sneak into copy-pasted timestamps (T308448)
 		s += '[\\u200E\\u200F]?';
@ -237,6 +252,7 @@ Parser.prototype.getTimestampParser = function ( contLangVariant, format, digits

 		switch ( code ) {
 			case 'xx':
+			case 'xn':
 				break;
 			case 'xg':
 			case 'd':
@ -245,12 +261,14 @@ Parser.prototype.getTimestampParser = function ( contLangVariant, format, digits
 			case 'l':
 			case 'F':
 			case 'M':
+			case 'm':
 			case 'n':
 			case 'Y':
 			case 'xkY':
 			case 'G':
 			case 'H':
 			case 'i':
+			case 's':
 				matchingGroups.push( code );
 				break;
 			case '\\':
@ -340,6 +358,7 @@ Parser.prototype.getTimestampParser = function ( contLangVariant, format, digits
 						'sep', 'oct', 'nov', 'dec'
 					] ).indexOf( text );
 					break;
+				case 'm':
 				case 'n':
 					monthIdx = Number( untransformDigits( text ) ) - 1;
 					break;
@ -357,6 +376,9 @@ Parser.prototype.getTimestampParser = function ( contLangVariant, format, digits
 				case 'i':
 					minute = Number( untransformDigits( text ) );
 					break;
+				case 's':
+					// Seconds - unused, because most timestamp formats omit them
+					break;
 				default:
 					throw new Error( 'Not implemented' );
 			}
--- a/tests/cases/timestamp-regex.json
+++ b/tests/cases/timestamp-regex.json
@ -43,5 +43,10 @@
 		"format": "H:i💩 j F Y",
 		"expected": "(\\d{2})[\\u200E\\u200F]?:[\\u200E\\u200F]?(\\d{2})[\\u200E\\u200F]?💩[\\u200E\\u200F]? [\\u200E\\u200F]?(\\d{1,2})[\\u200E\\u200F]? [\\u200E\\u200F]?(January|February|March|April|May|June|July|August|September|October|November|December)[\\u200E\\u200F]? [\\u200E\\u200F]?(\\d{4})[\\u200E\\u200F]? [\\u200E\\u200F]?\\((UTC)\\)",
 		"message": "(en) UTF-16 multibyte characters survive"
+	},
+	{
+		"format": "xnY-xnm-xnd\"T\"xnH:xni:xns",
+		"expected": "[\\u200E\\u200F]?([0-9]{4})[\\u200E\\u200F]?\\-[\\u200E\\u200F]?[\\u200E\\u200F]?([0-9]{2})[\\u200E\\u200F]?\\-[\\u200E\\u200F]?[\\u200E\\u200F]?([0-9]{2})[\\u200E\\u200F]?T[\\u200E\\u200F]?[\\u200E\\u200F]?([0-9]{2})[\\u200E\\u200F]?:[\\u200E\\u200F]?[\\u200E\\u200F]?([0-9]{2})[\\u200E\\u200F]?:[\\u200E\\u200F]?[\\u200E\\u200F]?([0-9]{2})[\\u200E\\u200F]? [\\u200E\\u200F]?\\((UTC)\\)",
+		"message": "ISO 8601"
 	}
 ]