mediawiki-extensions-Cite/lib/ext/Cite/References.js
C. Scott Ananian b84b71af22 Gallery: shift TSRs in the DOM, rather than fibbing about srcOffset
Passing srcOffsets which don't actually correspond to actual regions of
the source wikitext cause problems in the token offset conversion code.
Instead, parse the wikitext as itself, then adjust the TSRs in the DOM
tree.

Since Gallery isn't ported to PHP (yet), update the
automatically-generated Gallery/index.php.  The newly-added
ContentUtils::shiftDSR() was ported, however.

Change-Id: I28f3d3398930733ae2bcf9759e49c45f93bc7190
2019-06-28 14:10:16 +00:00

444 lines
15 KiB
JavaScript

'use strict';
const ParsoidExtApi = module.parent.parent.require('./extapi.js').versionCheck('^0.10.0');
const { ContentUtils, DOMDataUtils, DOMUtils, TokenUtils, WTUtils, Promise } = ParsoidExtApi;
/**
* @class
*/
class References {
static hasRef(node) {
var c = node.firstChild;
while (c) {
if (DOMUtils.isElt(c)) {
if (WTUtils.isSealedFragmentOfType(c, 'ref')) {
return true;
}
if (References.hasRef(c)) {
return true;
}
}
c = c.nextSibling;
}
return false;
}
static toDOM(state, content, args) {
return ParsoidExtApi.parseTokenContentsToDOM(state, args, '', content, {
wrapperTag: 'div',
pipelineOpts: {
extTag: 'references',
inTemplate: state.parseContext.inTemplate,
},
}).then(function(doc) {
var refsOpts = Object.assign({
group: null,
responsive: null,
}, TokenUtils.kvToHash(args, true));
var frag = References.createReferences(state.env, doc, doc.body, refsOpts, function(dp) {
dp.src = state.extToken.hasAttribute('source') ? state.extToken.getAttribute('source') : null;
// Redundant - also present on doc.body.firstChild, but feels cumbersome to use
dp.selfClose = state.extToken.dataAttribs.selfClose;
});
doc.body.appendChild(frag);
return doc;
});
}
static createReferences(env, doc, body, refsOpts, modifyDp, autoGenerated) {
var ol = doc.createElement('ol');
ol.classList.add('mw-references');
ol.classList.add('references');
if (body) {
DOMUtils.migrateChildren(body, ol);
}
// Support the `responsive` parameter
var rrOpts = env.conf.wiki.responsiveReferences;
var responsiveWrap = rrOpts.enabled;
if (refsOpts.responsive !== null) {
responsiveWrap = refsOpts.responsive !== '0';
}
var frag;
if (responsiveWrap) {
var div = doc.createElement('div');
div.classList.add('mw-references-wrap');
div.appendChild(ol);
frag = div;
} else {
frag = ol;
}
if (autoGenerated) {
DOMDataUtils.addAttributes(frag, {
typeof: 'mw:Extension/references',
about: env.newAboutId(),
});
}
var dp = DOMDataUtils.getDataParsoid(frag);
if (refsOpts.group) { // No group for the empty string either
dp.group = refsOpts.group;
ol.setAttribute('data-mw-group', refsOpts.group);
}
if (typeof modifyDp === 'function') {
modifyDp(dp);
}
return frag;
}
static extractRefFromNode(node, refsData, referencesAboutId, referencesGroup, nestedRefsHTML) {
var env = refsData.env;
var doc = node.ownerDocument;
var nestedInReferences = referencesAboutId !== undefined;
// This is data-parsoid from the dom fragment node that's gone through
// dsr computation and template wrapping.
var nodeDp = DOMDataUtils.getDataParsoid(node);
var typeOf = node.getAttribute('typeof') || '';
var isTplWrapper = /\bmw:Transclusion\b/.test(typeOf);
var nodeType = typeOf.replace(/mw:DOMFragment\/sealed\/ref/, '');
var content = nodeDp.html;
var tplDmw = isTplWrapper ? DOMDataUtils.getDataMw(node) : null;
// This is the <sup> that's the meat of the sealed fragment
var c = env.fragmentMap.get(content)[0];
// All the actions that require loaded data-attributes on `c` are done
// here so that we can quickly store those away for later.
DOMDataUtils.visitAndLoadDataAttribs(c);
var cDp = DOMDataUtils.getDataParsoid(c);
var refDmw = DOMDataUtils.getDataMw(c);
if (!cDp.empty && References.hasRef(c)) { // nested ref-in-ref
References._processRefs(env, refsData, c);
}
DOMDataUtils.visitAndStoreDataAttribs(c);
// Use the about attribute on the wrapper with priority, since it's
// only added when the wrapper is a template sibling.
const about = node.hasAttribute('about') ? node.getAttribute('about') :
c.hasAttribute('about') ? c.getAttribute('about') : '';
// FIXME(SSS): Need to clarify semantics here.
// If both the containing <references> elt as well as the nested <ref>
// elt has a group attribute, what takes precedence?
var group = refDmw.attrs.group || referencesGroup || '';
var refName = refDmw.attrs.name || '';
var ref = refsData.add(env, group, refName, about, nestedInReferences);
// Add ref-index linkback
var linkBack = doc.createElement('sup');
// FIXME: Lot of useless work for an edge case
if (cDp.empty) {
// Discard wrapper if there was no input wikitext
content = null;
if (cDp.selfClose) {
refDmw.body = undefined;
} else {
refDmw.body = { 'html': '' };
}
} else {
// If there are multiple <ref>s with the same name, but different content,
// the content of the first <ref> shows up in the <references> section.
// in order to ensure lossless RT-ing for later <refs>, we have to record
// HTML inline for all of them.
var html = '';
var contentDiffers = false;
if (ref.hasMultiples) {
// Use the non-pp version here since we've already stored attribs
// before putting them in the map.
html = ContentUtils.toXML(c, { innerXML: true });
contentDiffers = html !== ref.cachedHtml;
}
if (contentDiffers) {
refDmw.body = { 'html': html };
} else {
refDmw.body = { 'id': "mw-reference-text-" + ref.target };
}
}
DOMDataUtils.addAttributes(linkBack, {
'about': about,
'class': 'mw-ref',
'id': nestedInReferences ? undefined :
(ref.name ? ref.linkbacks[ref.linkbacks.length - 1] : ref.id),
'rel': 'dc:references',
'typeof': nodeType,
});
DOMDataUtils.addTypeOf(linkBack, "mw:Extension/ref");
var dataParsoid = {
src: nodeDp.src,
dsr: nodeDp.dsr,
pi: nodeDp.pi,
};
DOMDataUtils.setDataParsoid(linkBack, dataParsoid);
if (isTplWrapper) {
DOMDataUtils.setDataMw(linkBack, tplDmw);
} else {
DOMDataUtils.setDataMw(linkBack, refDmw);
}
// refLink is the link to the citation
var refLink = doc.createElement('a');
DOMDataUtils.addAttributes(refLink, {
'href': env.page.titleURI + '#' + ref.target,
'style': 'counter-reset: mw-Ref ' + ref.groupIndex + ';',
});
if (ref.group) {
refLink.setAttribute('data-mw-group', ref.group);
}
// refLink-span which will contain a default rendering of the cite link
// for browsers that don't support counters
var refLinkSpan = doc.createElement('span');
refLinkSpan.setAttribute('class', 'mw-reflink-text');
refLinkSpan.appendChild(doc.createTextNode("[" +
(ref.group ? ref.group + " " : "") + ref.groupIndex + "]"));
refLink.appendChild(refLinkSpan);
linkBack.appendChild(refLink);
if (!nestedInReferences) {
node.parentNode.replaceChild(linkBack, node);
} else {
// We don't need to delete the node now since it'll be removed in
// `insertReferencesIntoDOM` when all the children all cleaned out.
nestedRefsHTML.push(ContentUtils.ppToXML(linkBack), '\n');
}
// Keep the first content to compare multiple <ref>s with the same name.
if (!ref.content) {
ref.content = content;
ref.dir = (refDmw.attrs.dir || '').toLowerCase();
}
}
static insertReferencesIntoDOM(refsNode, refsData, nestedRefsHTML, autoGenerated) {
var env = refsData.env;
var isTplWrapper = /\bmw:Transclusion\b/.test(refsNode.getAttribute('typeof') || '');
var dp = DOMDataUtils.getDataParsoid(refsNode);
var group = dp.group || '';
if (!isTplWrapper) {
var dataMw = DOMDataUtils.getDataMw(refsNode);
if (!Object.keys(dataMw).length) {
// FIXME: This can be moved to `insertMissingReferencesIntoDOM`
console.assert(autoGenerated);
dataMw = {
'name': 'references',
'attrs': {
'group': group || undefined, // Dont emit empty keys
},
};
DOMDataUtils.setDataMw(refsNode, dataMw);
}
// Mark this auto-generated so that we can skip this during
// html -> wt and so that clients can strip it if necessary.
if (autoGenerated) {
dataMw.autoGenerated = true;
} else if (nestedRefsHTML.length > 0) {
dataMw.body = { 'html': '\n' + nestedRefsHTML.join('') };
} else if (!dp.selfClose) {
dataMw.body = { 'html': '' };
} else {
dataMw.body = undefined;
}
dp.selfClose = undefined;
}
var refGroup = refsData.getRefGroup(group);
// Deal with responsive wrapper
if (refsNode.classList.contains('mw-references-wrap')) {
var rrOpts = env.conf.wiki.responsiveReferences;
if (refGroup && refGroup.refs.length > rrOpts.threshold) {
refsNode.classList.add('mw-references-columns');
}
refsNode = refsNode.firstChild;
}
// Remove all children from the references node
//
// Ex: When {{Reflist}} is reused from the cache, it comes with
// a bunch of references as well. We have to remove all those cached
// references before generating fresh references.
while (refsNode.firstChild) {
refsNode.removeChild(refsNode.firstChild);
}
if (refGroup) {
refGroup.refs.forEach(ref => refGroup.renderLine(env, refsNode, ref));
}
// Remove the group from refsData
refsData.removeRefGroup(group);
}
/**
* Process `<ref>`s left behind after the DOM is fully processed.
* We process them as if there was an implicit `<references />` tag at
* the end of the DOM.
*/
static insertMissingReferencesIntoDOM(refsData, node) {
var env = refsData.env;
var doc = node.ownerDocument;
refsData.refGroups.forEach((refsValue, refsGroup) => {
var frag = References.createReferences(env, doc, null, {
group: refsGroup,
responsive: null,
}, function(dp) {
// The new references come out of "nowhere", so to make selser work
// propertly, add a zero-sized DSR pointing to the end of the document.
dp.dsr = [env.page.src.length, env.page.src.length, 0, 0];
}, true);
// Add a \n before the <ol> so that when serialized to wikitext,
// each <references /> tag appears on its own line.
node.appendChild(doc.createTextNode("\n"));
node.appendChild(frag);
References.insertReferencesIntoDOM(frag, refsData, [""], true);
});
}
static lintHandler(refs, env, tplInfo, domLinter) {
// Nothing to do
//
// FIXME: Not entirely true for scenarios where the <ref> tags
// are defined in the references section that is itself templated.
//
// {{1x|<references>\n<ref name='x'><b>foo</ref>\n</references>}}
//
// In this example, the references tag has the right tplInfo and
// when the <ref> tag is processed in the body of the article where
// it is accessed, there is no relevant template or dsr info available.
//
// Ignoring for now.
return refs.nextSibling;
}
static _processRefs(env, refsData, node) {
var child = node.firstChild;
while (child !== null) {
var nextChild = child.nextSibling;
if (DOMUtils.isElt(child)) {
if (WTUtils.isSealedFragmentOfType(child, 'ref')) {
References.extractRefFromNode(child, refsData);
} else if ((/(?:^|\s)mw:Extension\/references(?=$|\s)/).test(child.getAttribute('typeof') || '')) {
var referencesId = child.getAttribute("about") || '';
var referencesGroup = DOMDataUtils.getDataParsoid(child).group;
var nestedRefsHTML = [];
References._processRefsInReferences(refsData, child, referencesId, referencesGroup, nestedRefsHTML);
References.insertReferencesIntoDOM(child, refsData, nestedRefsHTML);
} else {
// inline media -- look inside the data-mw attribute
if (WTUtils.isInlineMedia(child)) {
/* -----------------------------------------------------------------
* FIXME(subbu): This works but feels very special-cased in 2 ways:
*
* 1. special cased to images vs. any node that might have
* serialized HTML embedded in data-mw
* 2. special cased to global cite handling -- the general scenario
* is DOM post-processors that do different things on the
* top-level vs not.
* - Cite needs to process these fragments in the context of the
* top-level page, and has to be done in order of how the nodes
* are encountered.
* - DOM cleanup can be done on embedded fragments without
* any page-level context and in any order.
* - So, some variability here.
*
* We should be running dom.cleanup.js passes on embedded html
* in data-mw and other attributes. Since correctness doesn't
* depend on that cleanup, I am not adding more special-case
* code in dom.cleanup.js.
*
* Doing this more generically will require creating a DOMProcessor
* class and adding state to it.
*
* See T214994
* ----------------------------------------------------------------- */
var dmw = DOMDataUtils.getDataMw(child);
var caption = dmw.caption;
if (caption) {
// Extract the caption HTML, build the DOM, process refs,
// serialize to HTML, update the caption HTML.
var captionDOM = ContentUtils.ppToDOM(env, caption);
References._processRefs(env, refsData, captionDOM);
dmw.caption = ContentUtils.ppToXML(captionDOM, { innerXML: true });
}
}
if (child.hasChildNodes()) {
References._processRefs(env, refsData, child);
}
}
}
child = nextChild;
}
}
/**
* This handles wikitext like this:
* ```
* <references> <ref>foo</ref> </references>
* <references> <ref>bar</ref> </references>
* ```
* @private
*/
static _processRefsInReferences(refsData, node, referencesId, referencesGroup, nestedRefsHTML) {
var child = node.firstChild;
while (child !== null) {
var nextChild = child.nextSibling;
if (DOMUtils.isElt(child)) {
if (WTUtils.isSealedFragmentOfType(child, 'ref')) {
References.extractRefFromNode(child, refsData, referencesId, referencesGroup, nestedRefsHTML);
} else if (child.hasChildNodes()) {
References._processRefsInReferences(refsData, child, referencesId, referencesGroup, nestedRefsHTML);
}
}
child = nextChild;
}
}
}
References.serialHandler = {
handle: Promise.async(function *(node, state, wrapperUnmodified) {
var dataMw = DOMDataUtils.getDataMw(node);
if (dataMw.autoGenerated && state.rtTestMode) {
// Eliminate auto-inserted <references /> noise in rt-testing
return '';
} else {
var startTagSrc = yield state.serializer.serializeExtensionStartTag(node, state);
if (!dataMw.body) {
return startTagSrc; // We self-closed this already.
} else if (typeof dataMw.body.html === 'string') {
var src = yield state.serializer.serializeHTML({
env: state.env,
extName: dataMw.name,
}, dataMw.body.html);
return startTagSrc + src + '</' + dataMw.name + '>';
} else {
state.env.log('error',
'References body unavailable for: ' + node.outerHTML);
return ''; // Drop it!
}
}
}),
// FIXME: LEAKY -- Should we expose newline constraints to extensions?
before: function(node, otherNode, state) {
// Serialize new references tags on a new line.
if (WTUtils.isNewElt(node)) {
return { min: 1, max: 2 };
} else {
return null;
}
},
};
module.exports = References;