diff --git a/includes/CommentUtils.php b/includes/CommentUtils.php index e32a130da..647cb57bd 100644 --- a/includes/CommentUtils.php +++ b/includes/CommentUtils.php @@ -6,6 +6,7 @@ use Config; use LogicException; use MediaWiki\Extension\DiscussionTools\ThreadItem\ContentCommentItem; use MediaWiki\Extension\DiscussionTools\ThreadItem\ContentThreadItem; +use MediaWiki\MainConfigNames; use Wikimedia\Assert\Assert; use Wikimedia\Parsoid\DOM\Comment; use Wikimedia\Parsoid\DOM\Element; @@ -483,11 +484,16 @@ class CommentUtils { /** * Get a MediaWiki page title from a URL * - * @param string $url - * @param Config $config + * @param string $url Relative URL (from a `href` attribute) + * @param Config $config Config settings needed to resolve the relative URL * @return string|null */ public static function getTitleFromUrl( string $url, Config $config ): ?string { + // Protocol-relative URLs are handled really badly by parse_url() + if ( str_starts_with( $url, '//' ) ) { + $url = "http:$url"; + } + $bits = parse_url( $url ); $query = wfCgiToArray( $bits['query'] ?? '' ); if ( isset( $query['title'] ) ) { @@ -495,19 +501,26 @@ class CommentUtils { } // TODO: Set the correct base in the document? - if ( strpos( $url, './' ) === 0 ) { - $url = 'https://local' . str_replace( '$1', substr( $url, 2 ), $config->get( 'ArticlePath' ) ); - } elseif ( strpos( $url, '://' ) === false ) { - $url = 'https://local' . $url; + $articlePath = $config->get( MainConfigNames::ArticlePath ); + if ( str_starts_with( $url, './' ) ) { + // Assume this is URL in the format used by Parsoid documents + $url = substr( $url, 2 ); + $path = str_replace( '$1', $url, $articlePath ); + } elseif ( !str_contains( $url, '://' ) ) { + // Assume this is URL in the format used by legacy parser documents + $path = $url; + } else { + // External link + $path = $bits['path'] ?? ''; } - $articlePathRegexp = '/' . str_replace( - preg_quote( '$1', '/' ), - '(.*)', - preg_quote( $config->get( 'ArticlePath' ), '/' ) + $articlePathRegexp = '/^' . str_replace( + '\\$1', + '([^?]*)', + preg_quote( $articlePath, '/' ) ) . '/'; $matches = null; - if ( preg_match( $articlePathRegexp, $url, $matches ) ) { + if ( preg_match( $articlePathRegexp, $path, $matches ) ) { return urldecode( $matches[1] ); } return null; diff --git a/tests/phpunit/CommentUtilsTest.php b/tests/phpunit/CommentUtilsTest.php index a4d54a6f9..e4a9ecb29 100644 --- a/tests/phpunit/CommentUtilsTest.php +++ b/tests/phpunit/CommentUtilsTest.php @@ -2,6 +2,7 @@ namespace MediaWiki\Extension\DiscussionTools\Tests; +use HashConfig; use MediaWiki\Extension\DiscussionTools\CommentUtils; use MediaWiki\MediaWikiServices; @@ -35,4 +36,103 @@ class CommentUtilsTest extends IntegrationTestCase { public function provideIsSingleCommentSignedBy(): array { return static::getJson( '../cases/isSingleCommentSignedBy.json' ); } + + /** + * @covers \MediaWiki\Extension\DiscussionTools\CommentUtils::getTitleFromUrl + * @dataProvider provideGetTitleFromUrl_ShortUrl + * @dataProvider provideGetTitleFromUrl_ConfusingShortUrl + * @dataProvider provideGetTitleFromUrl_NoShortUrl + */ + public function testGetTitleFromUrl( $expected, $input, $config ) { + static::assertEquals( + $expected, + CommentUtils::getTitleFromUrl( $input, $config ) + ); + } + + public static function provideGetTitleFromUrl_ShortUrl() { + // Standard short URL configuration like on Wikimedia wikis + $config = new HashConfig( [ 'ArticlePath' => '/wiki/$1' ] ); + + // These should never occur in documents generated by either wikitext parser + yield 'ShortUrl-null-string' => [ null, 'Foo', $config ]; + yield 'ShortUrl-null-path' => [ null, 'path/Foo', $config ]; + yield 'ShortUrl-null-wiki-path' => [ null, 'wiki/Foo', $config ]; + + // Legacy wikitext parser + yield 'ShortUrl-simple-path' => [ 'Foo', '/wiki/Foo', $config ]; + yield 'ShortUrl-simple-cgi' => [ 'Foo', '/w/index.php?title=Foo', $config ]; + yield 'ShortUrl-viewing-path' => [ 'Foo', '/wiki/Foo?action=view', $config ]; + yield 'ShortUrl-viewing-cgi' => [ 'Foo', '/w/index.php?title=Foo&action=view', $config ]; + yield 'ShortUrl-editing-path' => [ 'Foo', '/wiki/Foo?action=edit', $config ]; + yield 'ShortUrl-editing-cgi' => [ 'Foo', '/w/index.php?title=Foo&action=edit', $config ]; + yield 'ShortUrl-repeated question-mark' => [ 'Foo', '/wiki/Foo?Gosh?This?Path?Is?Bad', $config ]; + + // Parsoid parser + yield 'ShortUrl-parsoid-simple-path' => [ 'Foo', './Foo', $config ]; + yield 'ShortUrl-parsoid-viewing-path' => [ 'Foo', './Foo?action=view', $config ]; + yield 'ShortUrl-parsoid-editing-path' => [ 'Foo', './Foo?action=edit', $config ]; + + // External link (matches regardless of domain - this may be unexpected) + yield 'ShortUrl-external-path1' => [ 'Foo', 'http://example.com/wiki/Foo', $config ]; + yield 'ShortUrl-external-path2' => [ 'Foo', 'http://example.org/wiki/Foo', $config ]; + yield 'ShortUrl-external-cgi1' => [ 'Foo', 'http://example.com/w/index.php?title=Foo', $config ]; + yield 'ShortUrl-external-cgi2' => [ 'Foo', 'http://example.org/w/index.php?title=Foo', $config ]; + yield 'ShortUrl-external-null' => [ null, 'http://example.net/Foo', $config ]; + } + + public static function provideGetTitleFromUrl_ConfusingShortUrl() { + // Super short URL that is confusing for the software but people use it anyway + $config = new HashConfig( [ 'ArticlePath' => '/$1' ] ); + + // These should never occur in documents generated by either wikitext parser + yield 'ConfusingShortUrl-null-string' => [ null, 'Foo', $config ]; + yield 'ConfusingShortUrl-null-path' => [ null, 'path/Foo', $config ]; + yield 'ConfusingShortUrl-null-wiki-path' => [ null, 'wiki/Foo', $config ]; + + // Legacy wikitext parser + yield 'ConfusingShortUrl-simple-path' => [ 'Foo', '/Foo', $config ]; + yield 'ConfusingShortUrl-simple-cgi' => [ 'Foo', '/index.php?title=Foo', $config ]; + yield 'ConfusingShortUrl-viewing-path' => [ 'Foo', '/Foo?action=view', $config ]; + yield 'ConfusingShortUrl-viewing-cgi' => [ 'Foo', '/index.php?title=Foo&action=view', $config ]; + yield 'ConfusingShortUrl-editing-path' => [ 'Foo', '/Foo?action=edit', $config ]; + yield 'ConfusingShortUrl-editing-cgi' => [ 'Foo', '/index.php?title=Foo&action=edit', $config ]; + yield 'ConfusingShortUrl-repeated question-mark' => [ 'Foo', '/Foo?Gosh?This?Path?Is?Bad', $config ]; + + // Parsoid parser + yield 'ConfusingShortUrl-parsoid-simple-path' => [ 'Foo', './Foo', $config ]; + yield 'ConfusingShortUrl-parsoid-viewing-path' => [ 'Foo', './Foo?action=view', $config ]; + yield 'ConfusingShortUrl-parsoid-editing-path' => [ 'Foo', './Foo?action=edit', $config ]; + + // External link (matches regardless of domain - this may be unexpected) + yield 'ShortUrl-external-path1' => [ 'Foo', 'http://example.com/Foo', $config ]; + yield 'ShortUrl-external-path2' => [ 'Foo', 'http://example.org/Foo', $config ]; + yield 'ShortUrl-external-cgi1' => [ 'Foo', 'http://example.com/index.php?title=Foo', $config ]; + yield 'ShortUrl-external-cgi2' => [ 'Foo', 'http://example.org/index.php?title=Foo', $config ]; + } + + public static function provideGetTitleFromUrl_NoShortUrl() { + // No short URL configuration + $config = new HashConfig( [ 'ArticlePath' => '/wiki/index.php?title=$1' ] ); + + // These should never occur in documents generated by either wikitext parser + yield 'NoShortUrl-null-string' => [ null, 'Foo', $config ]; + yield 'NoShortUrl-null-path' => [ null, 'path/Foo', $config ]; + yield 'NoShortUrl-null-wiki-path' => [ null, 'wiki/Foo', $config ]; + + // Legacy wikitext parser + yield 'NoShortUrl-simple-path' => [ 'Foo', '/wiki/index.php?title=Foo', $config ]; + yield 'NoShortUrl-viewing-path' => [ 'Foo', '/wiki/index.php?title=Foo&action=view', $config ]; + yield 'NoShortUrl-editing-path' => [ 'Foo', '/wiki/index.php?title=Foo&action=edit', $config ]; + + // Parsoid parser + yield 'NoShortUrl-parsoid-simple-path' => [ 'Foo', './index.php?title=Foo', $config ]; + yield 'NoShortUrl-parsoid-viewing-path' => [ 'Foo', './index.php?title=Foo&action=view', $config ]; + yield 'NoShortUrl-parsoid-editing-path' => [ 'Foo', './index.php?title=Foo&action=edit', $config ]; + + // External link (matches regardless of domain - this may be unexpected) + yield 'ShortUrl-external-cgi1' => [ 'Foo', 'http://example.com/wiki/index.php?title=Foo', $config ]; + yield 'ShortUrl-external-cgi2' => [ 'Foo', 'http://example.org/wiki/index.php?title=Foo', $config ]; + yield 'ShortUrl-external-null' => [ null, 'http://example.net/Foo', $config ]; + } }