Fix PHP getTitleFromUrl() when $wgArticlePath is '/$1'

In this case, the generated regexp would match the '/local' part in
the generated URL. Prefixing 'https://local' is no longer necessary
since 10899af666.

Add tests for this, and some tests to cover T261711 as well.

Bug: T358321
Change-Id: Idf54deba13f30b799b7b8d17de1897bc90f95701
This commit is contained in:
Bartosz Dziewoński 2024-02-24 00:49:36 +01:00
parent cf9f11167d
commit 6419c19d1e
2 changed files with 102 additions and 22 deletions

View file

@ -483,11 +483,16 @@ class CommentUtils {
/**
* Get a MediaWiki page title from a URL
*
* @param string $url
* @param Config $config
* @param string $url Relative URL (from a `href` attribute)
* @param Config $config Config settings needed to resolve the relative URL
* @return string|null
*/
public static function getTitleFromUrl( string $url, Config $config ): ?string {
// Protocol-relative URLs are handled really badly by parse_url()
if ( str_starts_with( $url, '//' ) ) {
$url = "http:$url";
}
$bits = parse_url( $url );
$query = wfCgiToArray( $bits['query'] ?? '' );
if ( isset( $query['title'] ) ) {
@ -497,19 +502,24 @@ class CommentUtils {
// TODO: Set the correct base in the document?
$articlePath = $config->get( MainConfigNames::ArticlePath );
if ( str_starts_with( $url, './' ) ) {
// Assume this is URL in the format used by Parsoid documents
$url = substr( $url, 2 );
$url = 'https://local' . str_replace( '$1', $url, $articlePath );
$path = str_replace( '$1', $url, $articlePath );
} elseif ( !str_contains( $url, '://' ) ) {
$url = 'https://local' . $url;
// Assume this is URL in the format used by legacy parser documents
$path = $url;
} else {
// External link
$path = $bits['path'] ?? '';
}
$articlePathRegexp = '/' . str_replace(
$articlePathRegexp = '/^' . str_replace(
'\\$1',
'([^?]*)',
preg_quote( $articlePath, '/' )
) . '/';
$matches = null;
if ( preg_match( $articlePathRegexp, $url, $matches ) ) {
if ( preg_match( $articlePathRegexp, $path, $matches ) ) {
return urldecode( $matches[1] );
}
return null;

View file

@ -2,7 +2,7 @@
namespace MediaWiki\Extension\DiscussionTools\Tests;
use MediaWiki\Config\GlobalVarConfig;
use MediaWiki\Config\HashConfig;
use MediaWiki\Extension\DiscussionTools\CommentUtils;
/**
@ -35,7 +35,9 @@ class CommentUtilsTest extends IntegrationTestCase {
/**
* @covers \MediaWiki\Extension\DiscussionTools\CommentUtils::getTitleFromUrl
* @dataProvider provideGetTitleFromUrl
* @dataProvider provideGetTitleFromUrl_ShortUrl
* @dataProvider provideGetTitleFromUrl_ConfusingShortUrl
* @dataProvider provideGetTitleFromUrl_NoShortUrl
*/
public function testGetTitleFromUrl( $expected, $input, $config ) {
static::assertEquals(
@ -44,21 +46,89 @@ class CommentUtilsTest extends IntegrationTestCase {
);
}
public static function provideGetTitleFromUrl() {
// TODO: Test with different configs.
$config = new GlobalVarConfig();
$GLOBALS['wgArticlePath'] = '/wiki/$1';
public static function provideGetTitleFromUrl_ShortUrl() {
// Standard short URL configuration like on Wikimedia wikis
$config = new HashConfig( [ 'ArticlePath' => '/wiki/$1' ] );
yield 'null-string' => [ null, 'Foo', $config ];
yield 'null-path' => [ null, 'path/Foo', $config ];
yield 'null-wiki-path' => [ null, 'wiki/Foo', $config ];
yield 'simple-path' => [ 'Foo', 'site/wiki/Foo', $config ];
yield 'simple-cgi' => [ 'Foo', 'site/w/index.php?title=Foo', $config ];
yield 'viewing-path' => [ 'Foo', 'site/wiki/Foo?action=view', $config ];
yield 'viewing-cgi' => [ 'Foo', 'site/w/index.php?title=Foo&action=view', $config ];
yield 'editing-path' => [ 'Foo', 'site/wiki/Foo?action=edit', $config ];
yield 'editing-cgi' => [ 'Foo', 'site/w/index.php?title=Foo&action=edit', $config ];
// These should never occur in documents generated by either wikitext parser
yield 'ShortUrl-null-string' => [ null, 'Foo', $config ];
yield 'ShortUrl-null-path' => [ null, 'path/Foo', $config ];
yield 'ShortUrl-null-wiki-path' => [ null, 'wiki/Foo', $config ];
yield 'repeated question-mark' => [ 'Foo', 'site/wiki/Foo?Gosh?This?Path?Is?Bad', $config ];
// Legacy wikitext parser
yield 'ShortUrl-simple-path' => [ 'Foo', '/wiki/Foo', $config ];
yield 'ShortUrl-simple-cgi' => [ 'Foo', '/w/index.php?title=Foo', $config ];
yield 'ShortUrl-viewing-path' => [ 'Foo', '/wiki/Foo?action=view', $config ];
yield 'ShortUrl-viewing-cgi' => [ 'Foo', '/w/index.php?title=Foo&action=view', $config ];
yield 'ShortUrl-editing-path' => [ 'Foo', '/wiki/Foo?action=edit', $config ];
yield 'ShortUrl-editing-cgi' => [ 'Foo', '/w/index.php?title=Foo&action=edit', $config ];
yield 'ShortUrl-repeated question-mark' => [ 'Foo', '/wiki/Foo?Gosh?This?Path?Is?Bad', $config ];
// Parsoid parser
yield 'ShortUrl-parsoid-simple-path' => [ 'Foo', './Foo', $config ];
yield 'ShortUrl-parsoid-viewing-path' => [ 'Foo', './Foo?action=view', $config ];
yield 'ShortUrl-parsoid-editing-path' => [ 'Foo', './Foo?action=edit', $config ];
// External link (matches regardless of domain - this may be unexpected)
yield 'ShortUrl-external-path1' => [ 'Foo', 'http://example.com/wiki/Foo', $config ];
yield 'ShortUrl-external-path2' => [ 'Foo', 'http://example.org/wiki/Foo', $config ];
yield 'ShortUrl-external-cgi1' => [ 'Foo', 'http://example.com/w/index.php?title=Foo', $config ];
yield 'ShortUrl-external-cgi2' => [ 'Foo', 'http://example.org/w/index.php?title=Foo', $config ];
yield 'ShortUrl-external-null' => [ null, 'http://example.net/Foo', $config ];
}
public static function provideGetTitleFromUrl_ConfusingShortUrl() {
// Super short URL that is confusing for the software but people use it anyway
$config = new HashConfig( [ 'ArticlePath' => '/$1' ] );
// These should never occur in documents generated by either wikitext parser
yield 'ConfusingShortUrl-null-string' => [ null, 'Foo', $config ];
yield 'ConfusingShortUrl-null-path' => [ null, 'path/Foo', $config ];
yield 'ConfusingShortUrl-null-wiki-path' => [ null, 'wiki/Foo', $config ];
// Legacy wikitext parser
yield 'ConfusingShortUrl-simple-path' => [ 'Foo', '/Foo', $config ];
yield 'ConfusingShortUrl-simple-cgi' => [ 'Foo', '/index.php?title=Foo', $config ];
yield 'ConfusingShortUrl-viewing-path' => [ 'Foo', '/Foo?action=view', $config ];
yield 'ConfusingShortUrl-viewing-cgi' => [ 'Foo', '/index.php?title=Foo&action=view', $config ];
yield 'ConfusingShortUrl-editing-path' => [ 'Foo', '/Foo?action=edit', $config ];
yield 'ConfusingShortUrl-editing-cgi' => [ 'Foo', '/index.php?title=Foo&action=edit', $config ];
yield 'ConfusingShortUrl-repeated question-mark' => [ 'Foo', '/Foo?Gosh?This?Path?Is?Bad', $config ];
// Parsoid parser
yield 'ConfusingShortUrl-parsoid-simple-path' => [ 'Foo', './Foo', $config ];
yield 'ConfusingShortUrl-parsoid-viewing-path' => [ 'Foo', './Foo?action=view', $config ];
yield 'ConfusingShortUrl-parsoid-editing-path' => [ 'Foo', './Foo?action=edit', $config ];
// External link (matches regardless of domain - this may be unexpected)
yield 'ShortUrl-external-path1' => [ 'Foo', 'http://example.com/Foo', $config ];
yield 'ShortUrl-external-path2' => [ 'Foo', 'http://example.org/Foo', $config ];
yield 'ShortUrl-external-cgi1' => [ 'Foo', 'http://example.com/index.php?title=Foo', $config ];
yield 'ShortUrl-external-cgi2' => [ 'Foo', 'http://example.org/index.php?title=Foo', $config ];
}
public static function provideGetTitleFromUrl_NoShortUrl() {
// No short URL configuration
$config = new HashConfig( [ 'ArticlePath' => '/wiki/index.php?title=$1' ] );
// These should never occur in documents generated by either wikitext parser
yield 'NoShortUrl-null-string' => [ null, 'Foo', $config ];
yield 'NoShortUrl-null-path' => [ null, 'path/Foo', $config ];
yield 'NoShortUrl-null-wiki-path' => [ null, 'wiki/Foo', $config ];
// Legacy wikitext parser
yield 'NoShortUrl-simple-path' => [ 'Foo', '/wiki/index.php?title=Foo', $config ];
yield 'NoShortUrl-viewing-path' => [ 'Foo', '/wiki/index.php?title=Foo&action=view', $config ];
yield 'NoShortUrl-editing-path' => [ 'Foo', '/wiki/index.php?title=Foo&action=edit', $config ];
// Parsoid parser
yield 'NoShortUrl-parsoid-simple-path' => [ 'Foo', './index.php?title=Foo', $config ];
yield 'NoShortUrl-parsoid-viewing-path' => [ 'Foo', './index.php?title=Foo&action=view', $config ];
yield 'NoShortUrl-parsoid-editing-path' => [ 'Foo', './index.php?title=Foo&action=edit', $config ];
// External link (matches regardless of domain - this may be unexpected)
yield 'ShortUrl-external-cgi1' => [ 'Foo', 'http://example.com/wiki/index.php?title=Foo', $config ];
yield 'ShortUrl-external-cgi2' => [ 'Foo', 'http://example.org/wiki/index.php?title=Foo', $config ];
yield 'ShortUrl-external-null' => [ null, 'http://example.net/Foo', $config ];
}
}