mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/Cite
synced 2024-11-25 07:15:31 +00:00
167dac7979
* Added mechanism to parse a set of tokens in its own parsing scope all the way to DOM (which in turn restricts token transformations to just that scope). This is equivalent to enforcing well-balanced requirements in restricted contexts (Ex: link content, image captions for starters). This also provides an option of enforcing balanced templates in certain contexts. This patch applies it to link content and image captions. Deleted the hacky closeUnclosedBlockTags code for dealing with bad HTML in captions. * Refactored common/duplicate code out of Cite handler and Template handler. * Updated DSR handling for dom-fragments which eliminates the warnings about cs/s dsr mismatches seen on image reuse. it:Dalmine used to get a bunch of DSR inconsistency warnings when dom-fragments were reused (reported in bug 53071) and are now fixed with this patch. * There is still one big hole in DOM fragment unpacking that has to be fixed. This pertains to improper tag nesting that will be broken up by the tree builder. The DOM fragment unpacker has to recognize these scenarios and fix up the DOM (either by fixing up the final DOM or by stripping mis-nested tags in the DOM fragment being unpacked). This patch has an incomplete hack for this that addresses the common-use case of nested a-tags generated by wikitext of this form: "[http://foo.bar a [[Wikilink] here]". Bug: 54454 Bug: 49942 Bug: 44476 Bug: 47326 Change-Id: I33ee38bc43743125b705ac821b339586593dbef7
495 lines
13 KiB
JavaScript
495 lines
13 KiB
JavaScript
/* ----------------------------------------------------------------------
|
|
* This file implements <ref> and <references> extension tag handling
|
|
* natively in Parsoid.
|
|
* ---------------------------------------------------------------------- */
|
|
"use strict";
|
|
|
|
var Util = require( './mediawiki.Util.js' ).Util,
|
|
DU = require( './mediawiki.DOMUtils.js').DOMUtils,
|
|
coreutil = require('util'),
|
|
defines = require('./mediawiki.parser.defines.js'),
|
|
$ = require( './fakejquery' );
|
|
|
|
// define some constructor shortcuts
|
|
var KV = defines.KV,
|
|
EOFTk = defines.EOFTk,
|
|
SelfclosingTagTk = defines.SelfclosingTagTk;
|
|
|
|
// FIXME: Move out to some common helper file?
|
|
// Helper function to process extension source
|
|
function processExtSource(manager, extToken, opts) {
|
|
var extSrc = extToken.getAttribute('source'),
|
|
tagWidths = extToken.dataAttribs.tagWidths,
|
|
content = extSrc.substring(tagWidths[0], extSrc.length - tagWidths[1]);
|
|
|
|
// FIXME: SSS: This stripping maybe be unecessary after all.
|
|
//
|
|
// FIXME: Should this be specific to the extension
|
|
//
|
|
// or is it okay to do this unconditionally for all?
|
|
// Right now, this code is run only for ref and references,
|
|
// so not a real problem, but if this is used on other extensions,
|
|
// requires addressing.
|
|
//
|
|
// Strip all leading white-space
|
|
var wsMatch = content.match(/^(\s*)([^]*)$/),
|
|
leadingWS = wsMatch[1];
|
|
|
|
// Update content to normalized form
|
|
content = wsMatch[2];
|
|
|
|
if (!content || content.length === 0) {
|
|
opts.emptyContentCB(opts.res);
|
|
} else {
|
|
// Pass an async signal since the ext-content is not processed completely.
|
|
opts.parentCB({tokens: opts.res, async: true});
|
|
|
|
// Wrap templates always
|
|
opts.pipelineOpts = Util.extendProps({}, opts.pipelineOpts, { wrapTemplates: true });
|
|
|
|
var tsr = extToken.dataAttribs.tsr;
|
|
opts.srcOffsets = [ tsr[0]+tagWidths[0]+leadingWS.length, tsr[1]-tagWidths[1] ];
|
|
|
|
// Process ref content
|
|
Util.processContentInPipeline(
|
|
manager,
|
|
content.concat([new EOFTk()]),
|
|
opts
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Simple token transform version of the Ref extension tag
|
|
*
|
|
* @class
|
|
* @constructor
|
|
*/
|
|
function Ref(cite) {
|
|
this.cite = cite;
|
|
this.reset();
|
|
}
|
|
|
|
/**
|
|
* Reset state before each top-level parse -- this lets us share a pipeline
|
|
* to parse unrelated pages.
|
|
*/
|
|
Ref.prototype.reset = function() { };
|
|
|
|
/**
|
|
* Handle ref tokens
|
|
*/
|
|
Ref.prototype.handleRef = function ( manager, pipelineOpts, refTok, cb ) {
|
|
// Nested <ref> tags at the top level are considered errors
|
|
// But, inside templates, they are supported
|
|
if (!pipelineOpts.inTemplate && pipelineOpts.extTag === "ref") {
|
|
cb({ tokens: [refTok.getAttribute("source")] });
|
|
return;
|
|
}
|
|
|
|
var inReferencesExt = pipelineOpts.extTag === "references",
|
|
refOpts = $.extend({ name: null, group: null }, Util.KVtoHash(refTok.getAttribute("options"))),
|
|
about = manager.env.newAboutId(),
|
|
finalCB = function(toks, content) {
|
|
// Marker meta with ref content
|
|
var da = Util.clone(refTok.dataAttribs);
|
|
// Clear stx='html' so that sanitizer doesn't barf
|
|
da.stx = undefined;
|
|
|
|
toks.push(new SelfclosingTagTk( 'meta', [
|
|
new KV('typeof', 'mw:Extension/ref/Marker'),
|
|
new KV('about', about),
|
|
new KV('group', refOpts.group || ''),
|
|
new KV('name', refOpts.name || ''),
|
|
new KV('content', content || ''),
|
|
new KV('skiplinkback', inReferencesExt ? 1 : 0)
|
|
], da));
|
|
|
|
// All done!
|
|
cb({tokens: toks, async: false});
|
|
};
|
|
|
|
processExtSource(manager, refTok, {
|
|
// Full pipeline for processing ref-content
|
|
pipelineType: 'text/x-mediawiki/full',
|
|
pipelineOpts: {
|
|
inTemplate: pipelineOpts.inTemplate,
|
|
noPre: true,
|
|
extTag: "ref"
|
|
},
|
|
res: [],
|
|
parentCB: cb,
|
|
emptyContentCB: finalCB,
|
|
documentCB: function(refContentDoc) {
|
|
finalCB([], refContentDoc.body.innerHTML);
|
|
}
|
|
});
|
|
};
|
|
|
|
/**
|
|
* Helper class used by <references> implementation
|
|
*/
|
|
function RefGroup(group) {
|
|
this.name = group || '';
|
|
this.refs = [];
|
|
this.indexByName = {};
|
|
}
|
|
|
|
RefGroup.prototype.add = function(refName, about, skipLinkback) {
|
|
// NOTE: prefix name with "ref:" before using it as a property key
|
|
// This is to avoid overwriting predefined keys like 'constructor'
|
|
|
|
var ref, indexKey = "ref:" + refName;
|
|
if (refName && this.indexByName[indexKey]) {
|
|
ref = this.indexByName[indexKey];
|
|
} else {
|
|
var n = this.refs.length,
|
|
refKey = (1+n) + '';
|
|
|
|
if (refName) {
|
|
refKey = refName + '-' + refKey;
|
|
}
|
|
|
|
ref = {
|
|
about: about,
|
|
content: null,
|
|
group: this.name,
|
|
groupIndex: (1+n), // FIXME -- this seems to be wiki-specific
|
|
index: n,
|
|
key: refKey,
|
|
linkbacks: [],
|
|
name: refName,
|
|
target: 'cite_note-' + refKey
|
|
};
|
|
this.refs[n] = ref;
|
|
if (refName) {
|
|
this.indexByName[indexKey] = ref;
|
|
}
|
|
}
|
|
|
|
if (!skipLinkback) {
|
|
ref.linkbacks.push('cite_ref-' + ref.key + '-' + ref.linkbacks.length);
|
|
}
|
|
|
|
return ref;
|
|
};
|
|
|
|
RefGroup.prototype.renderLine = function(refsList, ref) {
|
|
var ownerDoc = refsList.ownerDocument,
|
|
arrow = ownerDoc.createTextNode('↑'),
|
|
li, a;
|
|
|
|
// Generate the li and set ref content first, so the HTML gets parsed.
|
|
// We then append the rest of the ref nodes before the first node
|
|
li = ownerDoc.createElement('li');
|
|
DU.addAttributes(li, {
|
|
'about': "#" + ref.target,
|
|
'id': ref.target
|
|
});
|
|
li.innerHTML = ref.content;
|
|
|
|
var contentNode = li.firstChild;
|
|
|
|
// 'mw:referencedBy' span wrapper
|
|
var span = ownerDoc.createElement('span');
|
|
span.setAttribute('rel', 'mw:referencedBy');
|
|
li.insertBefore(span, contentNode);
|
|
|
|
// Generate leading linkbacks
|
|
if (ref.linkbacks.length === 1) {
|
|
a = ownerDoc.createElement('a');
|
|
DU.addAttributes(a, {
|
|
'href': '#' + ref.linkbacks[0]
|
|
});
|
|
a.appendChild(arrow);
|
|
span.appendChild(a);
|
|
} else {
|
|
span.appendChild(arrow);
|
|
$.each(ref.linkbacks, function(i, linkback) {
|
|
a = ownerDoc.createElement('a');
|
|
DU.addAttributes(a, {
|
|
'href': '#' + ref.linkbacks[i]
|
|
});
|
|
a.appendChild(ownerDoc.createTextNode(ref.groupIndex + '.' + i));
|
|
// Separate linkbacks with a space
|
|
span.appendChild(ownerDoc.createTextNode(' '));
|
|
span.appendChild(a);
|
|
});
|
|
}
|
|
|
|
// Space before content node
|
|
li.insertBefore(ownerDoc.createTextNode(' '), contentNode);
|
|
|
|
// Add it to the ref list
|
|
refsList.appendChild(li);
|
|
};
|
|
|
|
// NOTE: prefix name with "refgroup:" before using it as a property key
|
|
// This is to avoid overwriting predefined keys like 'constructor'
|
|
function setRefGroup(refGroups, groupName, group) {
|
|
refGroups["refgroup:" + groupName] = group;
|
|
}
|
|
|
|
function getRefGroup(refGroups, groupName, allocIfMissing) {
|
|
groupName = groupName || '';
|
|
var key = "refgroup:" + groupName;
|
|
if (!refGroups[key] && allocIfMissing) {
|
|
setRefGroup(refGroups, groupName, new RefGroup(groupName));
|
|
}
|
|
return refGroups[key];
|
|
}
|
|
|
|
function References(cite) {
|
|
this.cite = cite;
|
|
this.reset();
|
|
}
|
|
|
|
References.prototype.reset = function(group) {
|
|
if (group) {
|
|
setRefGroup(this.refGroups, group, undefined);
|
|
} else {
|
|
this.refGroups = {};
|
|
/* -----------------------------------------------------------------
|
|
* Map: references-about-id --> HTML of any nested refs
|
|
*
|
|
* Ex: Given this wikitext:
|
|
*
|
|
* <references> <ref>foo</ref> </references>
|
|
* <references> <ref>bar</ref> </references>
|
|
*
|
|
* during processing, each of the references tag gets an about-id
|
|
* assigned to it. The ref-tags nested inside it have a data-attribute
|
|
* with the references about-id. When processing the ref-tokens and
|
|
* generating the HTML, we then collect the HTML for each nested
|
|
* ref-token and add it to this map by about-id.
|
|
* ----------------------------------------------------------------- */
|
|
this.nestedRefsHTMLMap = {};
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Sanitize the references tag and convert it into a meta-token
|
|
*/
|
|
References.prototype.handleReferences = function ( manager, pipelineOpts, refsTok, cb ) {
|
|
refsTok = refsTok.clone();
|
|
|
|
// group is the only recognized option?
|
|
var refsOpts = Util.KVtoHash(refsTok.getAttribute("options")),
|
|
group = refsOpts.group;
|
|
|
|
if ( group && group.constructor === Array ) {
|
|
// Array of tokens, convert to string.
|
|
group = Util.tokensToString(group);
|
|
}
|
|
|
|
// Point invalid / empty groups to null
|
|
if ( ! group ) {
|
|
group = null;
|
|
}
|
|
|
|
// Assign an about id and intialize the nested refs html
|
|
var referencesId = manager.env.newAboutId();
|
|
|
|
// Emit a marker mw:DOMFragment for the references
|
|
// token so that the dom post processor can generate
|
|
// and emit references at this point in the DOM.
|
|
var emitReferencesFragment = function() {
|
|
var type = refsTok.getAttribute('typeof');
|
|
var buf = [
|
|
"<ol class='references'",
|
|
" typeof='", "mw:Extension/references", "'",
|
|
" about='", referencesId, "'",
|
|
"></ol>"
|
|
];
|
|
var olProcessor = function(ol) {
|
|
var dp = DU.getJSONAttribute(ol, "data-parsoid", {});
|
|
dp.src = refsTok.getAttribute('source');
|
|
if (group) {
|
|
dp.group = group;
|
|
}
|
|
DU.setJSONAttribute(ol, "data-parsoid", dp);
|
|
};
|
|
|
|
cb({
|
|
tokens: DU.buildDOMFragmentForTokenStream(
|
|
refsTok,
|
|
buf.join(''),
|
|
manager.env,
|
|
olProcessor,
|
|
referencesId
|
|
),
|
|
async:false
|
|
});
|
|
}.bind(this);
|
|
|
|
processExtSource(manager, refsTok, {
|
|
// Partial pipeline for processing ref-content
|
|
// Expand till stage 2 so that all embedded
|
|
// ref tags get processed
|
|
pipelineType: 'text/x-mediawiki',
|
|
pipelineOpts: {
|
|
extTag: "references",
|
|
wrapTemplates: pipelineOpts.wrapTemplates
|
|
},
|
|
res: [],
|
|
parentCB: cb,
|
|
emptyContentCB: emitReferencesFragment,
|
|
chunkCB: function(chunk) {
|
|
// Extract ref-content tokens and discard the rest
|
|
var res = [];
|
|
for (var i = 0, n = chunk.length; i < n; i++) {
|
|
var t = chunk[i];
|
|
if (t.constructor === SelfclosingTagTk &&
|
|
t.name === 'meta' &&
|
|
/^mw:Extension\/ref\/Marker$/.test(t.getAttribute('typeof')))
|
|
{
|
|
t.setAttribute("references-id", referencesId);
|
|
res.push(t);
|
|
}
|
|
}
|
|
|
|
// Pass along the ref toks
|
|
cb({ tokens: res, async: true });
|
|
},
|
|
endCB: emitReferencesFragment
|
|
});
|
|
};
|
|
|
|
References.prototype.extractRefFromNode = function(node) {
|
|
var group = node.getAttribute("group"),
|
|
refName = node.getAttribute("name"),
|
|
about = node.getAttribute("about"),
|
|
skipLinkback = node.getAttribute("skiplinkback") === "1",
|
|
refGroup = getRefGroup(this.refGroups, group, true),
|
|
ref = refGroup.add(refName, about, skipLinkback),
|
|
nodeType = (node.getAttribute("typeof") || '').replace(/mw:Extension\/ref\/Marker/, '');
|
|
|
|
// Add ref-index linkback
|
|
var doc = node.ownerDocument,
|
|
span = doc.createElement('span'),
|
|
content = node.getAttribute("content"),
|
|
dataMW = node.getAttribute('data-mw');
|
|
|
|
if (!dataMW) {
|
|
dataMW = JSON.stringify({
|
|
'name': 'ref',
|
|
// Dont set body if this is a reused reference
|
|
// like <ref name='..' /> with empty content.
|
|
'body': content ? { 'html': content } : undefined,
|
|
'attrs': {
|
|
// Dont emit empty keys
|
|
'group': group || undefined,
|
|
'name': refName || undefined
|
|
}
|
|
});
|
|
}
|
|
|
|
DU.addAttributes(span, {
|
|
'about': about,
|
|
'class': 'reference',
|
|
'data-mw': dataMW,
|
|
'id': skipLinkback ? undefined : ref.linkbacks[ref.linkbacks.length - 1],
|
|
'rel': 'dc:references',
|
|
'typeof': nodeType
|
|
});
|
|
DU.addTypeOf(span, "mw:Extension/ref");
|
|
span.data = {
|
|
parsoid: {
|
|
src: node.data.parsoid.src,
|
|
dsr: node.data.parsoid.dsr
|
|
}
|
|
};
|
|
|
|
// refIndex-a
|
|
var refIndex = doc.createElement('a');
|
|
refIndex.setAttribute('href', '#' + ref.target);
|
|
refIndex.appendChild(doc.createTextNode(
|
|
'[' + ((group === '') ? '' : group + ' ') + ref.groupIndex + ']'
|
|
));
|
|
span.appendChild(refIndex);
|
|
|
|
if (!skipLinkback) {
|
|
// refIndex-span
|
|
node.parentNode.insertBefore(span, node);
|
|
} else {
|
|
var referencesAboutId = node.getAttribute("references-id");
|
|
// Init
|
|
if (!this.nestedRefsHTMLMap[referencesAboutId]) {
|
|
this.nestedRefsHTMLMap[referencesAboutId] = ["\n"];
|
|
}
|
|
this.nestedRefsHTMLMap[referencesAboutId].push(span.outerHTML, "\n");
|
|
}
|
|
|
|
// This effectively ignores content from later references with the same name.
|
|
// The implicit assumption is that that all those identically named refs. are
|
|
// of the form <ref name='foo' />
|
|
if (!ref.content) {
|
|
ref.content = node.getAttribute("content");
|
|
}
|
|
};
|
|
|
|
References.prototype.insertReferencesIntoDOM = function(refsNode) {
|
|
var about = refsNode.getAttribute('about'),
|
|
group = refsNode.data.parsoid.group || '',
|
|
src = refsNode.data.parsoid.src || '<references/>', // fall back so we don't crash
|
|
// Extract ext-source for <references>..</references> usage
|
|
body = Util.extractExtBody("references", src).trim(),
|
|
refGroup = getRefGroup(this.refGroups, group);
|
|
|
|
var dataMW = refsNode.getAttribute('data-mw');
|
|
if (!dataMW) {
|
|
var datamwBody;
|
|
// We'll have to output data-mw.body.extsrc in
|
|
// scenarios where original wikitext was of the form:
|
|
// "<references> lot of refs here </references>"
|
|
// Ex: See [[en:Barack Obama]]
|
|
if (body.length > 0) {
|
|
datamwBody = {
|
|
'extsrc': body,
|
|
'html': (this.nestedRefsHTMLMap[about] || []).join('')
|
|
};
|
|
}
|
|
|
|
dataMW = JSON.stringify({
|
|
'name': 'references',
|
|
'body': datamwBody,
|
|
'attrs': {
|
|
// Dont emit empty keys
|
|
'group': group || undefined
|
|
}
|
|
});
|
|
}
|
|
|
|
refsNode.setAttribute('data-mw', dataMW);
|
|
|
|
// Remove all children from the references node
|
|
while (refsNode.firstChild) {
|
|
refsNode.removeChild(refsNode.firstChild);
|
|
}
|
|
|
|
if (refGroup) {
|
|
refGroup.refs.map(refGroup.renderLine.bind(refGroup, refsNode));
|
|
}
|
|
|
|
// reset
|
|
this.reset(group);
|
|
};
|
|
|
|
/**
|
|
* Native Parsoid implementation of the Cite extension
|
|
* that ties together <ref> and <references>
|
|
*/
|
|
var Cite = function() {
|
|
this.ref = new Ref(this);
|
|
this.references = new References(this);
|
|
};
|
|
|
|
Cite.prototype.resetState = function() {
|
|
this.ref.reset();
|
|
this.references.reset();
|
|
};
|
|
|
|
if (typeof module === "object") {
|
|
module.exports.Cite = Cite;
|
|
}
|