Fix DiscussionParser failing in certain languages

It appears like the initial \h in this non-Unicode regular expression matches parts of an UTF-8 character, destroying it. This makes the final preg_match() in this method fail, when $output is going to be used as a pattern. Bug: T264922 Change-Id: Iaf240bc2e0808c2f57c1f8bab2589d3207915afe
2024-11-27 17:20:40 +00:00 · 2020-10-27 19:59:26 +01:00 · 2020-10-27 19:59:26 +01:00 · 8880df4123
parent b8d29d62ef
commit 8880df4123
2 changed files with 6 additions and 1 deletions
--- a/includes/DiscussionParser.php
+++ b/includes/DiscussionParser.php
@ -1155,7 +1155,7 @@ abstract class EchoDiscussionParser {
 		// Step 2: Generalise it
 		// Trim off the timezone to replace at the end
 		$output = $exemplarTimestamp;
-		$tzRegex = '/\h*\(\w+\)\h*$/';
+		$tzRegex = '/\h*\(\w+\)\h*$/u';
 		$tzMatches = [];
 		if ( preg_match( $tzRegex, $output, $tzMatches, PREG_OFFSET_CAPTURE ) ) {
 			$output = substr( $output, 0, $tzMatches[0][1] );
--- a/tests/phpunit/DiscussionParserTest.php
+++ b/tests/phpunit/DiscussionParserTest.php
@ -1013,6 +1013,11 @@ TEXT
 		$this->assertSame( 1, $match );
 	}

+	public function testTimestampRegex_T264922() {
+		$this->setMwGlobals( 'wgLanguageCode', 'skr' );
+		$this->assertIsString( EchoDiscussionParser::getTimestampRegex(), 'does not fail' );
+	}
+
 	public function testGetTimestampPosition() {
 		$line = 'Hello World. ' . self::getExemplarTimestamp();
 		$pos = EchoDiscussionParser::getTimestampPosition( $line );