(Bug 67237): Fix citation numbering issue

* Process <ref> and <references> tag on the top-level DOM only
  and ignore the generateRefs pass when processing other content.

* This required a few fixes:
  - ensure that DOMPostProcessor knows about the top-level.
  - ensure that DOMVisitor knows about the top-level.
  - cleanup pass leaves behind the ref-marker metas from DOMs from
    non top-level content.
  - process nested references content.

* One of the references tests had incorrect parsed output. That test
  has been updated to reflect the correct output from this patch.

* Barack Obama seems to now have the correct numbering on references.

Change-Id: I5465721d2fc715f2168f267e773a446bc37d198b
This commit is contained in:
Subramanya Sastry 2014-07-01 13:04:31 -05:00
parent e59aaecbcf
commit 69b6ec4d4d

View file

@ -82,8 +82,7 @@ Ref.prototype.handleRef = function ( manager, pipelineOpts, refTok, cb ) {
return;
}
var inReferencesExt = pipelineOpts.extTag === "references",
refOpts = Object.assign({
var refOpts = Object.assign({
name: null, group: null
}, Util.KVtoHash(refTok.getAttribute("options"), true)),
about = manager.env.newAboutId(),
@ -92,14 +91,9 @@ Ref.prototype.handleRef = function ( manager, pipelineOpts, refTok, cb ) {
var da = Util.clone(refTok.dataAttribs);
// Clear stx='html' so that sanitizer doesn't barf
da.stx = undefined;
if (!da.tmp) {
da.tmp = {};
}
da.tmp.group = refOpts.group || '';
da.tmp.name = refOpts.name || '';
da.tmp.content = content || '';
da.tmp.skiplinkback = inReferencesExt ? 1 : 0;
da.group = refOpts.group || '';
da.name = refOpts.name || '';
da.content = content || '';
toks.push(new SelfclosingTagTk( 'meta', [
new KV('typeof', 'mw:Extension/ref/Marker'),
@ -243,21 +237,6 @@ References.prototype.reset = function( group, resetIndex ) {
this.refGroups.delete(group);
} else {
this.refGroups = new Map();
/* -----------------------------------------------------------------
* Map: references-about-id --> HTML of any nested refs
*
* Ex: Given this wikitext:
*
* <references> <ref>foo</ref> </references>
* <references> <ref>bar</ref> </references>
*
* during processing, each of the references tag gets an about-id
* assigned to it. The ref-tags nested inside it have a data-attribute
* with the references about-id. When processing the ref-tokens and
* generating the HTML, we then collect the HTML for each nested
* ref-token and add it to this map by about-id.
* ----------------------------------------------------------------- */
this.nestedRefsHTMLMap = new Map();
}
// restart reference counter
@ -334,20 +313,23 @@ References.prototype.handleReferences = function ( manager, pipelineOpts, refsTo
});
};
References.prototype.extractRefFromNode = function(node, pipelineOpts) {
var dp = DU.getDataParsoid( node ),
group = dp.tmp.group,
refName = dp.tmp.name,
References.prototype.extractRefFromNode = function(node, referencesAboutId, referencesGroup, nestedRefsHTML) {
var nestedInRefs = referencesAboutId !== undefined,
dp = DU.getDataParsoid( node ),
// SSS FIXME: Need to clarify semantics here.
// If both the containing <references> elt as well as the nested <ref> elt has
// a group attribute, what takes precedence?
group = dp.group || referencesGroup || '',
refName = dp.name,
about = node.getAttribute("about"),
skipLinkback = dp.tmp.skiplinkback,
refGroup = getRefGroup(this.refGroups, group, true),
ref = refGroup.add(this, refName, about, skipLinkback),
ref = refGroup.add(this, refName, about, nestedInRefs),
nodeType = (node.getAttribute("typeof") || '').replace(/mw:Extension\/ref\/Marker/, '');
// Add ref-index linkback
var doc = node.ownerDocument,
span = doc.createElement('span'),
content = dp.tmp.content,
content = dp.content,
dataMW = node.getAttribute('data-mw');
if (!dataMW) {
@ -357,8 +339,11 @@ References.prototype.extractRefFromNode = function(node, pipelineOpts) {
// like <ref name='..' /> with empty content.
'body': content ? { 'html': content } : undefined,
'attrs': {
// Dont emit empty keys
'group': group || undefined,
// 1. Use 'dp.group' (which is the group attribute that the ref node had)
// rather than use 'group' (which could be the group from an enclosing
// <references> tag).
// 2. Dont emit empty keys
'group': dp.group || undefined,
'name': refName || undefined
}
});
@ -368,7 +353,7 @@ References.prototype.extractRefFromNode = function(node, pipelineOpts) {
'about': about,
'class': 'reference',
'data-mw': dataMW,
'id': skipLinkback ? undefined : ref.linkbacks[ref.linkbacks.length - 1],
'id': nestedInRefs ? undefined : ref.linkbacks[ref.linkbacks.length - 1],
'rel': 'dc:references',
'typeof': nodeType
});
@ -388,27 +373,21 @@ References.prototype.extractRefFromNode = function(node, pipelineOpts) {
));
span.appendChild(refIndex);
if (!skipLinkback) {
// refIndex-span
if (!nestedInRefs) {
node.parentNode.insertBefore(span, node);
} else {
var referencesAboutId = pipelineOpts.extTagId;
// Init
if ( !this.nestedRefsHTMLMap.has( referencesAboutId ) ) {
this.nestedRefsHTMLMap.set( referencesAboutId, ["\n"] );
}
this.nestedRefsHTMLMap.get( referencesAboutId ).push( DU.serializeNode( span ), "\n" );
nestedRefsHTML.push(DU.serializeNode(span), "\n");
}
// This effectively ignores content from later references with the same name.
// The implicit assumption is that that all those identically named refs. are
// of the form <ref name='foo' />
if (!ref.content) {
ref.content = dp.tmp.content;
ref.content = dp.content;
}
};
References.prototype.insertReferencesIntoDOM = function(refsNode) {
References.prototype.insertReferencesIntoDOM = function(refsNode, nestedRefsHTML) {
var about = refsNode.getAttribute('about'),
dp = DU.getDataParsoid( refsNode ),
group = dp.group || '',
@ -427,7 +406,7 @@ References.prototype.insertReferencesIntoDOM = function(refsNode) {
if (body.length > 0) {
datamwBody = {
'extsrc': body,
'html': ( this.nestedRefsHTMLMap.get( about ) || [] ).join('')
'html': nestedRefsHTML.join('')
};
}