Fix PHP getTitleFromUrl() when $wgArticlePath is '/$1'

In this case, the generated regexp would match the '/local' part in
the generated URL. Prefixing 'https://local' is no longer necessary
since 10899af666.

Add tests for this, and some tests to cover T261711 as well.

Bug: T358321
Change-Id: Idf54deba13f30b799b7b8d17de1897bc90f95701
(cherry picked from commit 6419c19d1e)
This commit is contained in:
Bartosz Dziewoński 2024-02-24 00:49:36 +01:00 committed by Alistair3149
parent 41fba9c7cc
commit 82803f9968
2 changed files with 124 additions and 11 deletions

View file

@ -6,6 +6,7 @@ use Config;
use LogicException;
use MediaWiki\Extension\DiscussionTools\ThreadItem\ContentCommentItem;
use MediaWiki\Extension\DiscussionTools\ThreadItem\ContentThreadItem;
use MediaWiki\MainConfigNames;
use Wikimedia\Assert\Assert;
use Wikimedia\Parsoid\DOM\Comment;
use Wikimedia\Parsoid\DOM\Element;
@ -483,11 +484,16 @@ class CommentUtils {
/**
* Get a MediaWiki page title from a URL
*
* @param string $url
* @param Config $config
* @param string $url Relative URL (from a `href` attribute)
* @param Config $config Config settings needed to resolve the relative URL
* @return string|null
*/
public static function getTitleFromUrl( string $url, Config $config ): ?string {
// Protocol-relative URLs are handled really badly by parse_url()
if ( str_starts_with( $url, '//' ) ) {
$url = "http:$url";
}
$bits = parse_url( $url );
$query = wfCgiToArray( $bits['query'] ?? '' );
if ( isset( $query['title'] ) ) {
@ -495,19 +501,26 @@ class CommentUtils {
}
// TODO: Set the correct base in the document?
if ( strpos( $url, './' ) === 0 ) {
$url = 'https://local' . str_replace( '$1', substr( $url, 2 ), $config->get( 'ArticlePath' ) );
} elseif ( strpos( $url, '://' ) === false ) {
$url = 'https://local' . $url;
$articlePath = $config->get( MainConfigNames::ArticlePath );
if ( str_starts_with( $url, './' ) ) {
// Assume this is URL in the format used by Parsoid documents
$url = substr( $url, 2 );
$path = str_replace( '$1', $url, $articlePath );
} elseif ( !str_contains( $url, '://' ) ) {
// Assume this is URL in the format used by legacy parser documents
$path = $url;
} else {
// External link
$path = $bits['path'] ?? '';
}
$articlePathRegexp = '/' . str_replace(
preg_quote( '$1', '/' ),
'(.*)',
preg_quote( $config->get( 'ArticlePath' ), '/' )
$articlePathRegexp = '/^' . str_replace(
'\\$1',
'([^?]*)',
preg_quote( $articlePath, '/' )
) . '/';
$matches = null;
if ( preg_match( $articlePathRegexp, $url, $matches ) ) {
if ( preg_match( $articlePathRegexp, $path, $matches ) ) {
return urldecode( $matches[1] );
}
return null;

View file

@ -2,6 +2,7 @@
namespace MediaWiki\Extension\DiscussionTools\Tests;
use HashConfig;
use MediaWiki\Extension\DiscussionTools\CommentUtils;
use MediaWiki\MediaWikiServices;
@ -35,4 +36,103 @@ class CommentUtilsTest extends IntegrationTestCase {
public function provideIsSingleCommentSignedBy(): array {
return static::getJson( '../cases/isSingleCommentSignedBy.json' );
}
/**
* @covers \MediaWiki\Extension\DiscussionTools\CommentUtils::getTitleFromUrl
* @dataProvider provideGetTitleFromUrl_ShortUrl
* @dataProvider provideGetTitleFromUrl_ConfusingShortUrl
* @dataProvider provideGetTitleFromUrl_NoShortUrl
*/
public function testGetTitleFromUrl( $expected, $input, $config ) {
static::assertEquals(
$expected,
CommentUtils::getTitleFromUrl( $input, $config )
);
}
public static function provideGetTitleFromUrl_ShortUrl() {
// Standard short URL configuration like on Wikimedia wikis
$config = new HashConfig( [ 'ArticlePath' => '/wiki/$1' ] );
// These should never occur in documents generated by either wikitext parser
yield 'ShortUrl-null-string' => [ null, 'Foo', $config ];
yield 'ShortUrl-null-path' => [ null, 'path/Foo', $config ];
yield 'ShortUrl-null-wiki-path' => [ null, 'wiki/Foo', $config ];
// Legacy wikitext parser
yield 'ShortUrl-simple-path' => [ 'Foo', '/wiki/Foo', $config ];
yield 'ShortUrl-simple-cgi' => [ 'Foo', '/w/index.php?title=Foo', $config ];
yield 'ShortUrl-viewing-path' => [ 'Foo', '/wiki/Foo?action=view', $config ];
yield 'ShortUrl-viewing-cgi' => [ 'Foo', '/w/index.php?title=Foo&action=view', $config ];
yield 'ShortUrl-editing-path' => [ 'Foo', '/wiki/Foo?action=edit', $config ];
yield 'ShortUrl-editing-cgi' => [ 'Foo', '/w/index.php?title=Foo&action=edit', $config ];
yield 'ShortUrl-repeated question-mark' => [ 'Foo', '/wiki/Foo?Gosh?This?Path?Is?Bad', $config ];
// Parsoid parser
yield 'ShortUrl-parsoid-simple-path' => [ 'Foo', './Foo', $config ];
yield 'ShortUrl-parsoid-viewing-path' => [ 'Foo', './Foo?action=view', $config ];
yield 'ShortUrl-parsoid-editing-path' => [ 'Foo', './Foo?action=edit', $config ];
// External link (matches regardless of domain - this may be unexpected)
yield 'ShortUrl-external-path1' => [ 'Foo', 'http://example.com/wiki/Foo', $config ];
yield 'ShortUrl-external-path2' => [ 'Foo', 'http://example.org/wiki/Foo', $config ];
yield 'ShortUrl-external-cgi1' => [ 'Foo', 'http://example.com/w/index.php?title=Foo', $config ];
yield 'ShortUrl-external-cgi2' => [ 'Foo', 'http://example.org/w/index.php?title=Foo', $config ];
yield 'ShortUrl-external-null' => [ null, 'http://example.net/Foo', $config ];
}
public static function provideGetTitleFromUrl_ConfusingShortUrl() {
// Super short URL that is confusing for the software but people use it anyway
$config = new HashConfig( [ 'ArticlePath' => '/$1' ] );
// These should never occur in documents generated by either wikitext parser
yield 'ConfusingShortUrl-null-string' => [ null, 'Foo', $config ];
yield 'ConfusingShortUrl-null-path' => [ null, 'path/Foo', $config ];
yield 'ConfusingShortUrl-null-wiki-path' => [ null, 'wiki/Foo', $config ];
// Legacy wikitext parser
yield 'ConfusingShortUrl-simple-path' => [ 'Foo', '/Foo', $config ];
yield 'ConfusingShortUrl-simple-cgi' => [ 'Foo', '/index.php?title=Foo', $config ];
yield 'ConfusingShortUrl-viewing-path' => [ 'Foo', '/Foo?action=view', $config ];
yield 'ConfusingShortUrl-viewing-cgi' => [ 'Foo', '/index.php?title=Foo&action=view', $config ];
yield 'ConfusingShortUrl-editing-path' => [ 'Foo', '/Foo?action=edit', $config ];
yield 'ConfusingShortUrl-editing-cgi' => [ 'Foo', '/index.php?title=Foo&action=edit', $config ];
yield 'ConfusingShortUrl-repeated question-mark' => [ 'Foo', '/Foo?Gosh?This?Path?Is?Bad', $config ];
// Parsoid parser
yield 'ConfusingShortUrl-parsoid-simple-path' => [ 'Foo', './Foo', $config ];
yield 'ConfusingShortUrl-parsoid-viewing-path' => [ 'Foo', './Foo?action=view', $config ];
yield 'ConfusingShortUrl-parsoid-editing-path' => [ 'Foo', './Foo?action=edit', $config ];
// External link (matches regardless of domain - this may be unexpected)
yield 'ShortUrl-external-path1' => [ 'Foo', 'http://example.com/Foo', $config ];
yield 'ShortUrl-external-path2' => [ 'Foo', 'http://example.org/Foo', $config ];
yield 'ShortUrl-external-cgi1' => [ 'Foo', 'http://example.com/index.php?title=Foo', $config ];
yield 'ShortUrl-external-cgi2' => [ 'Foo', 'http://example.org/index.php?title=Foo', $config ];
}
public static function provideGetTitleFromUrl_NoShortUrl() {
// No short URL configuration
$config = new HashConfig( [ 'ArticlePath' => '/wiki/index.php?title=$1' ] );
// These should never occur in documents generated by either wikitext parser
yield 'NoShortUrl-null-string' => [ null, 'Foo', $config ];
yield 'NoShortUrl-null-path' => [ null, 'path/Foo', $config ];
yield 'NoShortUrl-null-wiki-path' => [ null, 'wiki/Foo', $config ];
// Legacy wikitext parser
yield 'NoShortUrl-simple-path' => [ 'Foo', '/wiki/index.php?title=Foo', $config ];
yield 'NoShortUrl-viewing-path' => [ 'Foo', '/wiki/index.php?title=Foo&action=view', $config ];
yield 'NoShortUrl-editing-path' => [ 'Foo', '/wiki/index.php?title=Foo&action=edit', $config ];
// Parsoid parser
yield 'NoShortUrl-parsoid-simple-path' => [ 'Foo', './index.php?title=Foo', $config ];
yield 'NoShortUrl-parsoid-viewing-path' => [ 'Foo', './index.php?title=Foo&action=view', $config ];
yield 'NoShortUrl-parsoid-editing-path' => [ 'Foo', './index.php?title=Foo&action=edit', $config ];
// External link (matches regardless of domain - this may be unexpected)
yield 'ShortUrl-external-cgi1' => [ 'Foo', 'http://example.com/wiki/index.php?title=Foo', $config ];
yield 'ShortUrl-external-cgi2' => [ 'Foo', 'http://example.org/wiki/index.php?title=Foo', $config ];
yield 'ShortUrl-external-null' => [ null, 'http://example.net/Foo', $config ];
}
}