Normalize ref html before comparison

If the ref generated HTML contains attributes such as data-parsoid or
about, the string comparison fails, and two references that have the
same content end up being seen as different.
This patch filters out data-parsoid and about attributes from the
generated HTML.

Bug: T380152
Change-Id: I4c201c469d21ff9cb54f746386ce53b8d636361d
This commit is contained in:
Isabelle Hurbain-Palatin 2024-11-18 16:56:03 +01:00
parent 87ae1a1de0
commit 91316d49c2

View file

@ -10,6 +10,7 @@ use Cite\Cite;
use Cite\MarkSymbolRenderer;
use Closure;
use MediaWiki\Config\Config;
use MediaWiki\HtmlHelper;
use MediaWiki\MediaWikiServices;
use stdClass;
use Wikimedia\Message\MessageValue;
@ -27,6 +28,7 @@ use Wikimedia\Parsoid\NodeData\DataMw;
use Wikimedia\Parsoid\NodeData\DataMwError;
use Wikimedia\Parsoid\NodeData\DataParsoid;
use Wikimedia\Parsoid\Utils\DOMCompat;
use Wikimedia\RemexHtml\Serializer\SerializerNode;
/**
* @license GPL-2.0-or-later
@ -226,7 +228,7 @@ class References extends ExtensionTagHandler {
// Ideally, we should strip the mw:Cite/Follow wrappers before comparing
// But, we are going to ignore this edge case as not worth the complexity.
$html = $extApi->domToHtml( $c, true, false );
$contentDiffers = ( $html !== $ref->cachedHtml );
$contentDiffers = ( $this->normalizeRef( $html ) !== $this->normalizeRef( $ref->cachedHtml ) );
}
} else {
if ( $refsData->inReferencesContent() ) {
@ -878,4 +880,17 @@ class References extends ExtensionTagHandler {
return false;
}
private function normalizeRef( string $s ): string {
return HtmlHelper::modifyElements( $s,
static function ( SerializerNode $node ): bool {
return isset( $node->attrs['data-parsoid'] ) || isset( $node->attrs['about'] );
},
static function ( SerializerNode $node ): SerializerNode {
unset( $node->attrs['data-parsoid'] );
unset( $node->attrs['about'] );
return $node;
}
);
}
}