From 20ff1a519f4ea5bda41bfba6497db3d7dd995482 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20Dziewo=C5=84ski?= Date: Wed, 19 Jun 2024 17:03:48 +0200 Subject: [PATCH] Fix parsing usernames with `+` urldecode() should be used for decoding URL query parameters, rawurldecode() should be used for decoding URL paths. Bug: T367977 Change-Id: I7a7b14da85fb89f612c701d2746803d830017842 --- includes/CommentParser.php | 2 +- includes/CommentUtils.php | 2 +- tests/phpunit/CommentUtilsTest.php | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/includes/CommentParser.php b/includes/CommentParser.php index 4d8840b52..30afdaa67 100644 --- a/includes/CommentParser.php +++ b/includes/CommentParser.php @@ -1234,7 +1234,7 @@ class CommentParser { ) { $parsoidHref = $part['template']['target']['href']; Assert::precondition( substr( $parsoidHref, 0, 2 ) === './', "href has valid format" ); - $out[] = urldecode( substr( $parsoidHref, 2 ) ); + $out[] = rawurldecode( substr( $parsoidHref, 2 ) ); } else { $out[] = null; } diff --git a/includes/CommentUtils.php b/includes/CommentUtils.php index b5741b355..46c841af5 100644 --- a/includes/CommentUtils.php +++ b/includes/CommentUtils.php @@ -499,7 +499,7 @@ class CommentUtils { ) . '/'; $matches = null; if ( preg_match( $articlePathRegexp, $path, $matches ) ) { - return urldecode( $matches[1] ); + return rawurldecode( $matches[1] ); } return null; } diff --git a/tests/phpunit/CommentUtilsTest.php b/tests/phpunit/CommentUtilsTest.php index 16f2ea634..0d1518c47 100644 --- a/tests/phpunit/CommentUtilsTest.php +++ b/tests/phpunit/CommentUtilsTest.php @@ -36,6 +36,7 @@ class CommentUtilsTest extends IntegrationTestCase { /** * @covers \MediaWiki\Extension\DiscussionTools\CommentUtils::getTitleFromUrl * @dataProvider provideGetTitleFromUrl_ShortUrl + * @dataProvider provideGetTitleFromUrl_Decoding * @dataProvider provideGetTitleFromUrl_ConfusingShortUrl * @dataProvider provideGetTitleFromUrl_NoShortUrl */ @@ -46,6 +47,23 @@ class CommentUtilsTest extends IntegrationTestCase { ); } + public static function provideGetTitleFromUrl_Decoding() { + // Standard short URL configuration like on Wikimedia wikis + $config = new HashConfig( [ 'ArticlePath' => '/wiki/$1' ] ); + + // In URL paths, non-percent-encoded `+` represents itself + yield [ 'A+B', '/wiki/A+B', $config ]; + yield [ 'A B', '/wiki/A B', $config ]; + yield [ 'A+B', '/wiki/A%2BB', $config ]; + yield [ 'A B', '/wiki/A%20B', $config ]; + + // In URL query parameters, non-percent-encoded `+` represents ` ` + yield [ 'A B', '/w/index.php?title=A+B', $config ]; + yield [ 'A B', '/w/index.php?title=A B', $config ]; + yield [ 'A+B', '/w/index.php?title=A%2BB', $config ]; + yield [ 'A B', '/w/index.php?title=A%20B', $config ]; + } + public static function provideGetTitleFromUrl_ShortUrl() { // Standard short URL configuration like on Wikimedia wikis $config = new HashConfig( [ 'ArticlePath' => '/wiki/$1' ] );