Fix for Parsoid Cite refname whitespace handling

* Refnames such as 'a b' and 'a_b' are now kept seperate like
   in Core Cite. Refnames with unicode whitespace characters
   such as "a\u2028b' are handled as distinct refnames from 'a b'
   and their ID's are sanitized appropriately to have underbars.

Bug: T267974
Change-Id: Ie06d1f2b8614dbdcf8572ed4647ec9093ef006d5
This commit is contained in:
sbailey 2020-12-10 12:50:44 -08:00 committed by jenkins-bot
parent 97f965f9cb
commit 9679519b0a
2 changed files with 8 additions and 13 deletions

View file

@ -8,7 +8,6 @@ use DOMElement;
use DOMNode;
use stdClass;
use Wikimedia\Parsoid\Core\DomSourceRange;
use Wikimedia\Parsoid\Core\Sanitizer;
use Wikimedia\Parsoid\Ext\DOMDataUtils;
use Wikimedia\Parsoid\Ext\DOMUtils;
use Wikimedia\Parsoid\Ext\ExtensionTagHandler;
@ -174,16 +173,6 @@ class References extends ExtensionTagHandler {
$followName = $refDmw->attrs->follow ?? '';
$refDir = strtolower( $refDmw->attrs->dir ?? '' );
// Looks like Cite.php doesn't try to fix ids that already have
// a "_" in them. Ex: name="a b" and name="a_b" are considered
// identical. Not sure if this is a feature or a bug.
// It also considers entities equal to their encoding
// (i.e. '&' === '&'), which is done:
// in PHP: Sanitizer#decodeTagAttributes and
// in Parsoid: ExtensionHandler#normalizeExtOptions
$refName = Sanitizer::escapeIdForAttribute( $refName );
$followName = Sanitizer::escapeIdForAttribute( $followName );
// Add ref-index linkback
$linkBack = $doc->createElement( 'sup' );

View file

@ -5,6 +5,7 @@ namespace Wikimedia\Parsoid\Ext\Cite;
use DOMElement;
use stdClass;
use Wikimedia\Parsoid\Core\Sanitizer;
use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI;
class ReferencesData {
@ -110,8 +111,13 @@ class ReferencesData {
// Notes whose ref has a name are 'cite_note-' + name + '-' + index
$n = $this->index;
$refKey = strval( 1 + $n );
$refIdBase = 'cite_ref-' . ( $hasRefName ? $refName . '_' . $refKey : $refKey );
$noteId = 'cite_note-' . ( $hasRefName ? $refName . '-' . $refKey : $refKey );
// FIXME: normalizeKey in core performs a few more operations on $refName,
// which will be addressed in a follow up
$refNameSanitized = Sanitizer::escapeIdForAttribute( $refName );
$refIdBase = 'cite_ref-' . ( $hasRefName ? $refNameSanitized . '_' . $refKey : $refKey );
$noteId = 'cite_note-' . ( $hasRefName ? $refNameSanitized . '-' . $refKey : $refKey );
// bump index
$this->index += 1;