[Cite] Generate the same ids for <ref>s and notes as Cite.php

One particular case is that Cite.php considers equal a name and
its encoding, i.e. "a & b" === "a &amp; b". Added a new test for
this case, but blacklisted it on html2wt, wt2wt and html2html due
to a different problem with how Parsoid encodes entities. This
will be investigated separately, as a simple fix could break
unrelated cases.

Also updated tests and blacklist to the new ids.

Change-Id: I87637a1dc812a3a8f29327b9e6c0040b22a651c4
This commit is contained in:
Marc Ordinas i Llopis 2014-12-20 13:49:12 +01:00
parent 3c82fb7787
commit cd8e50acb2

View file

@ -8,7 +8,8 @@ require('./core-upgrade.js');
var Util = require( './mediawiki.Util.js' ).Util,
DU = require( './mediawiki.DOMUtils.js').DOMUtils,
coreutil = require('util'),
defines = require('./mediawiki.parser.defines.js');
defines = require('./mediawiki.parser.defines.js'),
entities = require('entities');
// define some constructor shortcuts
var KV = defines.KV,
@ -154,8 +155,10 @@ function makeValidIdAttr(val) {
// Looks like Cite.php doesn't try to fix ids that already have
// a "_" in them. Ex: name="a b" and name="a_b" are considered
// identical. Not sure if this is a feature or a bug.
// It also encodes them and then substitutes '%' with '.'.
return encodeURIComponent(val.replace(/\s/g, '_')).replace(/%/g, '.');
// It also considers entities equal to their encoding (i.e. '&' === '&amp;')
// and then substitutes % with .
var v = entities.decodeHTML(val).replace(/\s/g, '_');
return encodeURIComponent(v).replace(/%/g,".");
}
RefGroup.prototype.add = function(references, refName, about, skipLinkback) {
@ -164,25 +167,30 @@ RefGroup.prototype.add = function(references, refName, about, skipLinkback) {
if ( refName && this.indexByName.has( refName ) ) {
ref = this.indexByName.get( refName );
} else {
// The ids produced Cite.php have some particulars:
// Simple refs get 'cite_ref-' + index
// Refs with names get 'cite_ref-' + name + '_' + index + (backlink num || 0)
// Notes (references) whose ref doesn't have a name are 'cite_note-' + index
// Notes whose ref has a name are 'cite_note-' + name + '-' + index
var n = references.index,
refKey = (1+n) + '';
refKey = (1+n) + '',
refIdBase = 'cite_ref-' + (refName ? refName + '_' + refKey : refKey),
noteId = 'cite_note-' + (refName ? refName + '-' + refKey : refKey);
// bump index
references.index += 1;
if (refName) {
refKey = refName + '-' + refKey;
}
ref = {
about: about,
content: null,
group: this.name,
groupIndex: this.refs.length + 1,
index: n,
key: refKey,
key: refIdBase,
id: (refName ? refIdBase + '-0' : refIdBase),
linkbacks: [],
name: refName,
target: 'cite_note-' + refKey
target: noteId
};
this.refs.push( ref );
if (refName) {
@ -191,7 +199,7 @@ RefGroup.prototype.add = function(references, refName, about, skipLinkback) {
}
if (!skipLinkback) {
ref.linkbacks.push('cite_ref-' + ref.key + '-' + ref.linkbacks.length);
ref.linkbacks.push(ref.key + '-' + ref.linkbacks.length);
}
return ref;
@ -222,7 +230,7 @@ RefGroup.prototype.renderLine = function(refsList, ref) {
if (ref.linkbacks.length === 1) {
a = ownerDoc.createElement('a');
DU.addAttributes(a, {
'href': '#' + ref.linkbacks[0]
'href': '#' + ref.id
});
a.appendChild(arrow);
span.appendChild(a);
@ -387,7 +395,8 @@ References.prototype.extractRefFromNode = function(node, refInRefProcessor, refe
DU.addAttributes(span, {
'about': about,
'class': 'reference',
'id': nestedInReferences ? undefined : ref.linkbacks[ref.linkbacks.length - 1],
'id': nestedInReferences ? undefined :
(ref.name ? ref.linkbacks[ref.linkbacks.length - 1] : ref.id),
'rel': 'dc:references',
'typeof': nodeType
});