From 91316d49c248a32de9cb5dad67d8f52cf40094e3 Mon Sep 17 00:00:00 2001 From: Isabelle Hurbain-Palatin Date: Mon, 18 Nov 2024 16:56:03 +0100 Subject: [PATCH] Normalize ref html before comparison If the ref generated HTML contains attributes such as data-parsoid or about, the string comparison fails, and two references that have the same content end up being seen as different. This patch filters out data-parsoid and about attributes from the generated HTML. Bug: T380152 Change-Id: I4c201c469d21ff9cb54f746386ce53b8d636361d --- src/Parsoid/References.php | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/Parsoid/References.php b/src/Parsoid/References.php index 046316cbc..ec1bfaaf6 100644 --- a/src/Parsoid/References.php +++ b/src/Parsoid/References.php @@ -10,6 +10,7 @@ use Cite\Cite; use Cite\MarkSymbolRenderer; use Closure; use MediaWiki\Config\Config; +use MediaWiki\HtmlHelper; use MediaWiki\MediaWikiServices; use stdClass; use Wikimedia\Message\MessageValue; @@ -27,6 +28,7 @@ use Wikimedia\Parsoid\NodeData\DataMw; use Wikimedia\Parsoid\NodeData\DataMwError; use Wikimedia\Parsoid\NodeData\DataParsoid; use Wikimedia\Parsoid\Utils\DOMCompat; +use Wikimedia\RemexHtml\Serializer\SerializerNode; /** * @license GPL-2.0-or-later @@ -226,7 +228,7 @@ class References extends ExtensionTagHandler { // Ideally, we should strip the mw:Cite/Follow wrappers before comparing // But, we are going to ignore this edge case as not worth the complexity. $html = $extApi->domToHtml( $c, true, false ); - $contentDiffers = ( $html !== $ref->cachedHtml ); + $contentDiffers = ( $this->normalizeRef( $html ) !== $this->normalizeRef( $ref->cachedHtml ) ); } } else { if ( $refsData->inReferencesContent() ) { @@ -878,4 +880,17 @@ class References extends ExtensionTagHandler { return false; } + + private function normalizeRef( string $s ): string { + return HtmlHelper::modifyElements( $s, + static function ( SerializerNode $node ): bool { + return isset( $node->attrs['data-parsoid'] ) || isset( $node->attrs['about'] ); + }, + static function ( SerializerNode $node ): SerializerNode { + unset( $node->attrs['data-parsoid'] ); + unset( $node->attrs['about'] ); + return $node; + } + ); + } }