From cd8e50acb2496d011186e07f7337f74af69dd52a Mon Sep 17 00:00:00 2001 From: Marc Ordinas i Llopis Date: Sat, 20 Dec 2014 13:49:12 +0100 Subject: [PATCH] [Cite] Generate the same ids for s and notes as Cite.php One particular case is that Cite.php considers equal a name and its encoding, i.e. "a & b" === "a & b". Added a new test for this case, but blacklisted it on html2wt, wt2wt and html2html due to a different problem with how Parsoid encodes entities. This will be investigated separately, as a simple fix could break unrelated cases. Also updated tests and blacklist to the new ids. Change-Id: I87637a1dc812a3a8f29327b9e6c0040b22a651c4 --- lib/ext.Cite.js | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/lib/ext.Cite.js b/lib/ext.Cite.js index ffe92ffd6..7d1c704a3 100644 --- a/lib/ext.Cite.js +++ b/lib/ext.Cite.js @@ -8,7 +8,8 @@ require('./core-upgrade.js'); var Util = require( './mediawiki.Util.js' ).Util, DU = require( './mediawiki.DOMUtils.js').DOMUtils, coreutil = require('util'), - defines = require('./mediawiki.parser.defines.js'); + defines = require('./mediawiki.parser.defines.js'), + entities = require('entities'); // define some constructor shortcuts var KV = defines.KV, @@ -154,8 +155,10 @@ function makeValidIdAttr(val) { // Looks like Cite.php doesn't try to fix ids that already have // a "_" in them. Ex: name="a b" and name="a_b" are considered // identical. Not sure if this is a feature or a bug. - // It also encodes them and then substitutes '%' with '.'. - return encodeURIComponent(val.replace(/\s/g, '_')).replace(/%/g, '.'); + // It also considers entities equal to their encoding (i.e. '&' === '&') + // and then substitutes % with . + var v = entities.decodeHTML(val).replace(/\s/g, '_'); + return encodeURIComponent(v).replace(/%/g,"."); } RefGroup.prototype.add = function(references, refName, about, skipLinkback) { @@ -164,25 +167,30 @@ RefGroup.prototype.add = function(references, refName, about, skipLinkback) { if ( refName && this.indexByName.has( refName ) ) { ref = this.indexByName.get( refName ); } else { + // The ids produced Cite.php have some particulars: + // Simple refs get 'cite_ref-' + index + // Refs with names get 'cite_ref-' + name + '_' + index + (backlink num || 0) + // Notes (references) whose ref doesn't have a name are 'cite_note-' + index + // Notes whose ref has a name are 'cite_note-' + name + '-' + index var n = references.index, - refKey = (1+n) + ''; + refKey = (1+n) + '', + refIdBase = 'cite_ref-' + (refName ? refName + '_' + refKey : refKey), + noteId = 'cite_note-' + (refName ? refName + '-' + refKey : refKey); // bump index references.index += 1; - if (refName) { - refKey = refName + '-' + refKey; - } ref = { about: about, content: null, group: this.name, groupIndex: this.refs.length + 1, index: n, - key: refKey, + key: refIdBase, + id: (refName ? refIdBase + '-0' : refIdBase), linkbacks: [], name: refName, - target: 'cite_note-' + refKey + target: noteId }; this.refs.push( ref ); if (refName) { @@ -191,7 +199,7 @@ RefGroup.prototype.add = function(references, refName, about, skipLinkback) { } if (!skipLinkback) { - ref.linkbacks.push('cite_ref-' + ref.key + '-' + ref.linkbacks.length); + ref.linkbacks.push(ref.key + '-' + ref.linkbacks.length); } return ref; @@ -222,7 +230,7 @@ RefGroup.prototype.renderLine = function(refsList, ref) { if (ref.linkbacks.length === 1) { a = ownerDoc.createElement('a'); DU.addAttributes(a, { - 'href': '#' + ref.linkbacks[0] + 'href': '#' + ref.id }); a.appendChild(arrow); span.appendChild(a); @@ -387,7 +395,8 @@ References.prototype.extractRefFromNode = function(node, refInRefProcessor, refe DU.addAttributes(span, { 'about': about, 'class': 'reference', - 'id': nestedInReferences ? undefined : ref.linkbacks[ref.linkbacks.length - 1], + 'id': nestedInReferences ? undefined : + (ref.name ? ref.linkbacks[ref.linkbacks.length - 1] : ref.id), 'rel': 'dc:references', 'typeof': nodeType });