Improve unwrapFragment() for multi-paragraph comments

I've been reading some discussions in Special:DiscussionToolsDebug
and saw a good number of comments where the first paragraph has
a bullet point (`*`), and the following paragraphs are indented (`:`).

Change-Id: I1afadc49565e843c97286a3744941ffb4062ddec
This commit is contained in:
Bartosz Dziewoński 2024-06-20 17:22:03 +02:00
parent aff2c0be56
commit 674353691b
2 changed files with 24 additions and 1 deletions

View file

@ -346,7 +346,13 @@ class CommentModifier {
while (
static::allOfType( $fragment->childNodes, 'dl' ) ||
static::allOfType( $fragment->childNodes, 'ul' ) ||
static::allOfType( $fragment->childNodes, 'ol' )
static::allOfType( $fragment->childNodes, 'ol' ) ||
(
// Or if the comment starts with a bullet followed by indents
count( $fragment->childNodes ) > 1 &&
static::allOfType( [ $fragment->childNodes[0] ], 'ul' ) &&
static::allOfType( array_slice( iterator_to_array( $fragment->childNodes ), 1 ), 'dl' )
)
) {
// Do not iterate over childNodes while we're modifying it
$childNodeList = iterator_to_array( $fragment->childNodes );

View file

@ -124,6 +124,23 @@ class CommentModifierTest extends IntegrationTestCase {
return static::getJson( '../cases/unwrap.json' );
}
/**
* @dataProvider provideUnwrapFragment
*/
public function testUnwrapFragment( string $html, string $expected ): void {
$doc = static::createDocument( '' );
$container = DOMUtils::parseHTMLToFragment( $doc, $html );
CommentModifier::unwrapFragment( $container );
static::assertEquals( $expected, DOMUtils::getFragmentInnerHTML( $container ) );
}
public static function provideUnwrapFragment() {
yield [
"<ul><li>aaa</li></ul>\n<dl><dd>bbb</dd></dl>",
"<p>aaa</p>\n<p>bbb</p>",
];
}
/**
* @dataProvider provideAppendSignature
*/