/** * This module implements `` and `` extension tag handling * natively in Parsoid. * @module ext/Cite */ 'use strict'; const domino = require('domino'); const ParsoidExtApi = module.parent.require('./extapi.js').versionCheck('^0.10.0'); const { ContentUtils, DOMDataUtils, DOMUtils, TokenUtils, WTUtils, Promise, Sanitizer, } = ParsoidExtApi; /** * Simple token transform version of the Ref extension tag. * * @class */ function Ref(cite) { this.cite = cite; } function hasRef(node) { var c = node.firstChild; while (c) { if (DOMUtils.isElt(c)) { if (WTUtils.isSealedFragmentOfType(c, 'ref')) { return true; } if (hasRef(c)) { return true; } } c = c.nextSibling; } return false; } Ref.prototype.toDOM = function(state, content, args) { // Drop nested refs entirely, unless we've explicitly allowed them if (state.parseContext.extTag === 'ref' && !(state.parseContext.extTagOpts && state.parseContext.extTagOpts.allowNestedRef) ) { return null; } // The one supported case for nested refs is from the {{#tag:ref}} parser // function. However, we're overly permissive here since we can't // distinguish when that's nested in another template. // The php preprocessor did our expansion. const allowNestedRef = state.parseContext.inTemplate && state.parseContext.extTag !== 'ref'; return ParsoidExtApi.parseTokenContentsToDOM(state, args, '', content, { // NOTE: sup's content model requires it only contain phrasing // content, not flow content. However, since we are building an // in-memory DOM which is simply a tree data structure, we can // nest flow content in a tag. wrapperTag: 'sup', inTemplate: state.parseContext.inTemplate, extTag: 'ref', extTagOpts: { allowNestedRef: !!allowNestedRef, }, // FIXME: One-off PHP parser state leak. // This needs a better solution. inPHPBlock: true, }); }; Ref.prototype.serialHandler = { handle: Promise.async(function *(node, state, wrapperUnmodified) { var startTagSrc = yield state.serializer.serializeExtensionStartTag(node, state); var dataMw = DOMDataUtils.getDataMw(node); var env = state.env; var html; if (!dataMw.body) { return startTagSrc; // We self-closed this already. } else if (typeof dataMw.body.html === 'string') { // First look for the extension's content in data-mw.body.html html = dataMw.body.html; } else if (typeof dataMw.body.id === 'string') { // If the body isn't contained in data-mw.body.html, look if // there's an element pointed to by body.id. var bodyElt = node.ownerDocument.getElementById(dataMw.body.id); if (!bodyElt && env.page.editedDoc) { // Try to get to it from the main page. // This can happen when the is inside another // extension, most commonly inside a . // The recursive call to serializeDOM puts us inside // inside a new document. bodyElt = env.page.editedDoc.getElementById(dataMw.body.id); } if (bodyElt) { // n.b. this is going to drop any diff markers but since // the dom differ doesn't traverse into extension content // none should exist anyways. DOMDataUtils.visitAndStoreDataAttribs(bodyElt); html = ContentUtils.toXML(bodyElt, { innerXML: true }); DOMDataUtils.visitAndLoadDataAttribs(bodyElt); } else { // Some extra debugging for VisualEditor var extraDebug = ''; var firstA = node.querySelector('a[href]'); if (firstA && /^#/.test(firstA.getAttribute('href'))) { var href = firstA.getAttribute('href'); try { var ref = node.ownerDocument.querySelector(href); if (ref) { extraDebug += ' [own doc: ' + ref.outerHTML + ']'; } ref = env.page.editedDoc.querySelector(href); if (ref) { extraDebug += ' [main doc: ' + ref.outerHTML + ']'; } } catch (e) { } // eslint-disable-line if (!extraDebug) { extraDebug = ' [reference ' + href + ' not found]'; } } env.log('error/' + dataMw.name, 'extension src id ' + dataMw.body.id + ' points to non-existent element for:', node.outerHTML, '. More debug info: ', extraDebug); return ''; // Drop it! } } else { env.log('error', 'Ref body unavailable for: ' + node.outerHTML); return ''; // Drop it! } var src = yield state.serializer.serializeHTML({ env: state.env, extName: dataMw.name, // FIXME: One-off PHP parser state leak. // This needs a better solution. inPHPBlock: true, }, html); return startTagSrc + src + ''; }), }; Ref.prototype.lintHandler = function(ref, env, tplInfo, domLinter) { // Don't lint the content of ref in ref, since it can lead to cycles // using named refs if (WTUtils.fromExtensionContent(ref, 'references')) { return ref.nextNode; } var linkBackId = ref.firstChild.getAttribute('href').replace(/[^#]*#/, ''); var refNode = ref.ownerDocument.getElementById(linkBackId); if (refNode) { // Ex: Buggy input wikitext without ref content domLinter(refNode.lastChild, env, tplInfo.isTemplated ? tplInfo : null); } return ref.nextNode; }; /** * Helper class used by `` implementation. * @class */ function RefGroup(group) { this.name = group || ''; this.refs = []; this.indexByName = new Map(); } function makeValidIdAttr(val) { // Looks like Cite.php doesn't try to fix ids that already have // a "_" in them. Ex: name="a b" and name="a_b" are considered // identical. Not sure if this is a feature or a bug. // It also considers entities equal to their encoding // (i.e. '&' === '&'), which is done: // in PHP: Sanitizer#decodeTagAttributes and // in Parsoid: ExtensionHandler#normalizeExtOptions return Sanitizer.escapeIdForAttribute(val); } RefGroup.prototype.renderLine = function(env, refsList, ref) { var ownerDoc = refsList.ownerDocument; // Generate the li and set ref content first, so the HTML gets parsed. // We then append the rest of the ref nodes before the first node var li = ownerDoc.createElement('li'); DOMDataUtils.addAttributes(li, { 'about': "#" + ref.target, 'id': ref.target, 'class': ['rtl', 'ltr'].includes(ref.dir) ? 'mw-cite-dir-' + ref.dir : undefined, }); var reftextSpan = ownerDoc.createElement('span'); DOMDataUtils.addAttributes(reftextSpan, { 'id': "mw-reference-text-" + ref.target, 'class': "mw-reference-text", }); if (ref.content) { var content = env.fragmentMap.get(ref.content)[0]; DOMUtils.migrateChildrenBetweenDocs(content, reftextSpan); DOMDataUtils.visitAndLoadDataAttribs(reftextSpan); } li.appendChild(reftextSpan); // Generate leading linkbacks var createLinkback = function(href, group, text) { var a = ownerDoc.createElement('a'); var s = ownerDoc.createElement('span'); var textNode = ownerDoc.createTextNode(text + " "); a.setAttribute('href', env.page.titleURI + '#' + href); s.setAttribute('class', 'mw-linkback-text'); if (group) { a.setAttribute('data-mw-group', group); } s.appendChild(textNode); a.appendChild(s); return a; }; if (ref.linkbacks.length === 1) { var linkback = createLinkback(ref.id, ref.group, '↑'); linkback.setAttribute('rel', 'mw:referencedBy'); li.insertBefore(linkback, reftextSpan); } else { // 'mw:referencedBy' span wrapper var span = ownerDoc.createElement('span'); span.setAttribute('rel', 'mw:referencedBy'); li.insertBefore(span, reftextSpan); ref.linkbacks.forEach(function(lb, i) { span.appendChild(createLinkback(lb, ref.group, i + 1)); }); } // Space before content node li.insertBefore(ownerDoc.createTextNode(' '), reftextSpan); // Add it to the ref list refsList.appendChild(li); }; /** * @class */ function ReferencesData(env) { this.index = 0; this.env = env; this.refGroups = new Map(); } ReferencesData.prototype.getRefGroup = function(groupName, allocIfMissing) { groupName = groupName || ''; if (!this.refGroups.has(groupName) && allocIfMissing) { this.refGroups.set(groupName, new RefGroup(groupName)); } return this.refGroups.get(groupName); }; ReferencesData.prototype.removeRefGroup = function(groupName) { if (groupName !== null && groupName !== undefined) { // '' is a valid group (the default group) this.refGroups.delete(groupName); } }; ReferencesData.prototype.add = function(env, groupName, refName, about, skipLinkback) { var group = this.getRefGroup(groupName, true); refName = makeValidIdAttr(refName); var ref; if (refName && group.indexByName.has(refName)) { ref = group.indexByName.get(refName); if (ref.content && !ref.hasMultiples) { ref.hasMultiples = true; // Use the non-pp version here since we've already stored attribs // before putting them in the map. ref.cachedHtml = ContentUtils.toXML(env.fragmentMap.get(ref.content)[0], { innerXML: true }); } } else { // The ids produced Cite.php have some particulars: // Simple refs get 'cite_ref-' + index // Refs with names get 'cite_ref-' + name + '_' + index + (backlink num || 0) // Notes (references) whose ref doesn't have a name are 'cite_note-' + index // Notes whose ref has a name are 'cite_note-' + name + '-' + index var n = this.index; var refKey = (1 + n) + ''; var refIdBase = 'cite_ref-' + (refName ? refName + '_' + refKey : refKey); var noteId = 'cite_note-' + (refName ? refName + '-' + refKey : refKey); // bump index this.index += 1; ref = { about: about, content: null, dir: '', group: group.name, groupIndex: group.refs.length + 1, index: n, key: refIdBase, id: (refName ? refIdBase + '-0' : refIdBase), linkbacks: [], name: refName, target: noteId, hasMultiples: false, // Just used for comparison when we have multiples cachedHtml: '', }; group.refs.push(ref); if (refName) { group.indexByName.set(refName, ref); } } if (!skipLinkback) { ref.linkbacks.push(ref.key + '-' + ref.linkbacks.length); } return ref; }; /** * @class */ function References(cite) { this.cite = cite; } var dummyDoc = domino.createDocument(); var createReferences = function(env, body, refsOpts, modifyDp, autoGenerated) { var doc = body ? body.ownerDocument : dummyDoc; var ol = doc.createElement('ol'); ol.classList.add('mw-references'); ol.classList.add('references'); if (body) { DOMUtils.migrateChildren(body, ol); } // Support the `responsive` parameter var rrOpts = env.conf.wiki.responsiveReferences; var responsiveWrap = rrOpts.enabled; if (refsOpts.responsive !== null) { responsiveWrap = refsOpts.responsive !== '0'; } var frag; if (responsiveWrap) { var div = doc.createElement('div'); div.classList.add('mw-references-wrap'); div.appendChild(ol); frag = div; } else { frag = ol; } if (autoGenerated) { DOMDataUtils.addAttributes(frag, { typeof: 'mw:Extension/references', about: env.newAboutId(), }); } var dp = DOMDataUtils.getDataParsoid(frag); if (refsOpts.group) { // No group for the empty string either dp.group = refsOpts.group; ol.setAttribute('data-mw-group', refsOpts.group); } if (typeof modifyDp === 'function') { modifyDp(dp); } return frag; }; References.prototype.toDOM = function(state, content, args) { return ParsoidExtApi.parseTokenContentsToDOM(state, args, '', content, { wrapperTag: 'div', extTag: 'references', inTemplate: state.parseContext.inTemplate, }).then(function(doc) { var refsOpts = Object.assign({ group: null, responsive: null, }, TokenUtils.kvToHash(args, true)); var frag = createReferences(state.env, doc.body, refsOpts, function(dp) { dp.src = state.extToken.getAttribute('source'); // Redundant - also present on doc.body.firstChild, but feels cumbersome to use dp.selfClose = state.extToken.dataAttribs.selfClose; }); doc.body.appendChild(frag); return doc; }); }; var _processRefs; References.prototype.extractRefFromNode = function(node, refsData, cite, referencesAboutId, referencesGroup, nestedRefsHTML) { var env = refsData.env; var doc = node.ownerDocument; var nestedInReferences = referencesAboutId !== undefined; // This is data-parsoid from the dom fragment node that's gone through // dsr computation and template wrapping. var nodeDp = DOMDataUtils.getDataParsoid(node); var typeOf = node.getAttribute('typeof'); var isTplWrapper = /\bmw:Transclusion\b/.test(typeOf); var nodeType = (typeOf || '').replace(/mw:DOMFragment\/sealed\/ref/, ''); var content = nodeDp.html; var tplDmw = isTplWrapper ? DOMDataUtils.getDataMw(node) : null; // This is the that's the meat of the sealed fragment var c = env.fragmentMap.get(content)[0]; // All the actions that require loaded data-attributes on `c` are done // here so that we can quickly store those away for later. DOMDataUtils.visitAndLoadDataAttribs(c); var cDp = DOMDataUtils.getDataParsoid(c); var refDmw = DOMDataUtils.getDataMw(c); if (!cDp.empty && hasRef(c)) { // nested ref-in-ref _processRefs(cite, refsData, c); } DOMDataUtils.visitAndStoreDataAttribs(c); // Use the about attribute on the wrapper with priority, since it's // only added when the wrapper is a template sibling. const about = node.getAttribute('about') || c.getAttribute('about'); // FIXME(SSS): Need to clarify semantics here. // If both the containing elt as well as the nested // elt has a group attribute, what takes precedence? var group = refDmw.attrs.group || referencesGroup || ''; var refName = refDmw.attrs.name || ''; var ref = refsData.add(env, group, refName, about, nestedInReferences); // Add ref-index linkback var linkBack = doc.createElement('sup'); // FIXME: Lot of useless work for an edge case if (cDp.empty) { // Discard wrapper if there was no input wikitext content = null; if (cDp.selfClose) { refDmw.body = undefined; } else { refDmw.body = { 'html': '' }; } } else { // If there are multiple s with the same name, but different content, // the content of the first shows up in the section. // in order to ensure lossless RT-ing for later , we have to record // HTML inline for all of them. var html = ''; var contentDiffers = false; if (ref.hasMultiples) { // Use the non-pp version here since we've already stored attribs // before putting them in the map. html = ContentUtils.toXML(c, { innerXML: true }); contentDiffers = html !== ref.cachedHtml; } if (contentDiffers) { refDmw.body = { 'html': html }; } else { refDmw.body = { 'id': "mw-reference-text-" + ref.target }; } } DOMDataUtils.addAttributes(linkBack, { 'about': about, 'class': 'mw-ref', 'id': nestedInReferences ? undefined : (ref.name ? ref.linkbacks[ref.linkbacks.length - 1] : ref.id), 'rel': 'dc:references', 'typeof': nodeType, }); DOMDataUtils.addTypeOf(linkBack, "mw:Extension/ref"); var dataParsoid = { src: nodeDp.src, dsr: nodeDp.dsr, pi: nodeDp.pi, }; DOMDataUtils.setDataParsoid(linkBack, dataParsoid); if (isTplWrapper) { DOMDataUtils.setDataMw(linkBack, tplDmw); } else { DOMDataUtils.setDataMw(linkBack, refDmw); } // refLink is the link to the citation var refLink = doc.createElement('a'); DOMDataUtils.addAttributes(refLink, { 'href': env.page.titleURI + '#' + ref.target, 'style': 'counter-reset: mw-Ref ' + ref.groupIndex + ';', }); if (ref.group) { refLink.setAttribute('data-mw-group', ref.group); } // refLink-span which will contain a default rendering of the cite link // for browsers that don't support counters var refLinkSpan = doc.createElement('span'); refLinkSpan.setAttribute('class', 'mw-reflink-text'); refLinkSpan.appendChild(doc.createTextNode("[" + (ref.group ? ref.group + " " : "") + ref.groupIndex + "]")); refLink.appendChild(refLinkSpan); linkBack.appendChild(refLink); if (!nestedInReferences) { node.parentNode.replaceChild(linkBack, node); } else { // We don't need to delete the node now since it'll be removed in // `insertReferencesIntoDOM` when all the children all cleaned out. nestedRefsHTML.push(ContentUtils.ppToXML(linkBack), '\n'); } // Keep the first content to compare multiple s with the same name. if (!ref.content) { ref.content = content; ref.dir = (refDmw.attrs.dir || '').toLowerCase(); } }; References.prototype.insertReferencesIntoDOM = function(refsNode, refsData, nestedRefsHTML, autoGenerated) { var env = refsData.env; var isTplWrapper = /\bmw:Transclusion\b/.test(refsNode.getAttribute('typeof')); var dp = DOMDataUtils.getDataParsoid(refsNode); var group = dp.group || ''; if (!isTplWrapper) { var dataMw = DOMDataUtils.getDataMw(refsNode); if (!Object.keys(dataMw).length) { // FIXME: This can be moved to `insertMissingReferencesIntoDOM` console.assert(autoGenerated); dataMw = { 'name': 'references', 'attrs': { 'group': group || undefined, // Dont emit empty keys }, }; DOMDataUtils.setDataMw(refsNode, dataMw); } // Mark this auto-generated so that we can skip this during // html -> wt and so that clients can strip it if necessary. if (autoGenerated) { dataMw.autoGenerated = true; } else if (nestedRefsHTML.length > 0) { dataMw.body = { 'html': '\n' + nestedRefsHTML.join('') }; } else if (!dp.selfClose) { dataMw.body = { 'html': '' }; } else { dataMw.body = undefined; } dp.selfClose = undefined; } var refGroup = refsData.getRefGroup(group); // Deal with responsive wrapper if (refsNode.classList.contains('mw-references-wrap')) { var rrOpts = env.conf.wiki.responsiveReferences; if (refGroup && refGroup.refs.length > rrOpts.threshold) { refsNode.classList.add('mw-references-columns'); } refsNode = refsNode.firstChild; } // Remove all children from the references node // // Ex: When {{Reflist}} is reused from the cache, it comes with // a bunch of references as well. We have to remove all those cached // references before generating fresh references. while (refsNode.firstChild) { refsNode.removeChild(refsNode.firstChild); } if (refGroup) { refGroup.refs.forEach(ref => refGroup.renderLine(env, refsNode, ref)); } // Remove the group from refsData refsData.removeRefGroup(group); }; /** * Process ``s left behind after the DOM is fully processed. * We process them as if there was an implicit `` tag at * the end of the DOM. */ References.prototype.insertMissingReferencesIntoDOM = function(refsData, node) { var env = refsData.env; var doc = node.ownerDocument; refsData.refGroups.forEach((refsValue, refsGroup) => { var frag = createReferences(env, null, { group: refsGroup, responsive: null, }, function(dp) { // The new references come out of "nowhere", so to make selser work // propertly, add a zero-sized DSR pointing to the end of the document. dp.dsr = [env.page.src.length, env.page.src.length, 0, 0]; }, true); // Add a \n before the
    so that when serialized to wikitext, // each tag appears on its own line. node.appendChild(doc.createTextNode("\n")); node.appendChild(frag); this.insertReferencesIntoDOM(frag, refsData, [""], true); }); }; References.prototype.serialHandler = { handle: Promise.async(function *(node, state, wrapperUnmodified) { var dataMw = DOMDataUtils.getDataMw(node); if (dataMw.autoGenerated && state.rtTestMode) { // Eliminate auto-inserted noise in rt-testing return ''; } else { var startTagSrc = yield state.serializer.serializeExtensionStartTag(node, state); if (!dataMw.body) { return startTagSrc; // We self-closed this already. } else if (typeof dataMw.body.html === 'string') { var src = yield state.serializer.serializeHTML({ env: state.env, extName: dataMw.name, }, dataMw.body.html); return startTagSrc + src + ''; } else { state.env.log('error', 'References body unavailable for: ' + node.outerHTML); return ''; // Drop it! } } }), // FIXME: LEAKY -- Should we expose newline constraints to extensions? before: function(node, otherNode, state) { // Serialize new references tags on a new line. if (WTUtils.isNewElt(node)) { return { min: 1, max: 2 }; } else { return null; } }, }; References.prototype.lintHandler = function(refs, env, tplInfo, domLinter) { // Nothing to do // // FIXME: Not entirely true for scenarios where the tags // are defined in the references section that is itself templated. // // {{1x|\nfoo\n}} // // In this example, the references tag has the right tplInfo and // when the tag is processed in the body of the article where // it is accessed, there is no relevant template or dsr info available. // // Ignoring for now. return refs.nextNode; }; /** * This handles wikitext like this: * ``` * foo * bar * ``` * @private */ var _processRefsInReferences = function(cite, refsData, node, referencesId, referencesGroup, nestedRefsHTML) { var child = node.firstChild; while (child !== null) { var nextChild = child.nextSibling; if (DOMUtils.isElt(child)) { if (WTUtils.isSealedFragmentOfType(child, 'ref')) { cite.references.extractRefFromNode(child, refsData, cite, referencesId, referencesGroup, nestedRefsHTML); } else if (child.hasChildNodes()) { _processRefsInReferences(cite, refsData, child, referencesId, referencesGroup, nestedRefsHTML); } } child = nextChild; } }; _processRefs = function(cite, refsData, node) { var child = node.firstChild; while (child !== null) { var nextChild = child.nextSibling; if (DOMUtils.isElt(child)) { if (WTUtils.isSealedFragmentOfType(child, 'ref')) { cite.references.extractRefFromNode(child, refsData, cite); } else if ((/(?:^|\s)mw:Extension\/references(?=$|\s)/).test(child.getAttribute('typeOf'))) { var referencesId = child.getAttribute("about"); var referencesGroup = DOMDataUtils.getDataParsoid(child).group; var nestedRefsHTML = []; _processRefsInReferences(cite, refsData, child, referencesId, referencesGroup, nestedRefsHTML); cite.references.insertReferencesIntoDOM(child, refsData, nestedRefsHTML); } else { // inline media -- look inside the data-mw attribute if (WTUtils.isInlineMedia(child)) { /* ----------------------------------------------------------------- * FIXME(subbu): This works but feels very special-cased in 2 ways: * * 1. special cased to images vs. any node that might have * serialized HTML embedded in data-mw * 2. special cased to global cite handling -- the general scenario * is DOM post-processors that do different things on the * top-level vs not. * - Cite needs to process these fragments in the context of the * top-level page, and has to be done in order of how the nodes * are encountered. * - DOM cleanup can be done on embedded fragments without * any page-level context and in any order. * - So, some variability here. * * We should be running dom.cleanup.js passes on embedded html * in data-mw and other attributes. Since correctness doesn't * depend on that cleanup, I am not adding more special-case * code in dom.cleanup.js. * * Doing this more generically will require creating a DOMProcessor * class and adding state to it. * * See T214994 * ----------------------------------------------------------------- */ var dmw = DOMDataUtils.getDataMw(child); var caption = dmw.caption; if (caption) { // Extract the caption HTML, build the DOM, process refs, // serialize to HTML, update the caption HTML. var captionDOM = ContentUtils.ppToDOM(caption); _processRefs(cite, refsData, captionDOM); dmw.caption = ContentUtils.ppToXML(captionDOM, { innerXML: true }); } } if (child.hasChildNodes()) { _processRefs(cite, refsData, child); } } } child = nextChild; } }; /** * Native Parsoid implementation of the Cite extension * that ties together `` and ``. */ var Cite = function() { this.ref = new Ref(this); this.references = new References(this); this.config = { name: 'cite', domProcessors: { wt2htmlPostProcessor: (...args) => this._wt2htmlPostProcessor(...args), html2wtPreProcessor: (...args) => this._html2wtPreProcessor(...args), }, tags: [ { name: 'ref', toDOM: (...args) => this.ref.toDOM(...args), fragmentOptions: { unwrapFragment: false, }, serialHandler: this.ref.serialHandler, // FIXME: Rename to toWikitext lintHandler: this.ref.lintHandler, // FIXME: Do we need (a) domDiffHandler (b) ... others ... }, { name: 'references', toDOM: (...args) => this.references.toDOM(...args), serialHandler: this.references.serialHandler, lintHandler: this.references.lintHandler, }, ], styles: [ 'ext.cite.style', 'ext.cite.styles', ], }; }; /** * wt -> html DOM PostProcessor */ Cite.prototype._wt2htmlPostProcessor = function(body, env, options, atTopLevel) { if (atTopLevel) { var refsData = new ReferencesData(env); _processRefs(this, refsData, body); this.references.insertMissingReferencesIntoDOM(refsData, body); } }; /** * html -> wt DOM PreProcessor * * This is to reconstitute page-level information from local annotations * left behind by editing clients. * * Editing clients add inserted: true or deleted: true properties to a 's * data-mw object. These are no-ops for non-named s. For named s, * - for inserted refs, we might want to de-duplicate refs. * - for deleted refs, if the primary ref was deleted, we have to transfer * the primary ref designation to another instance of the named ref. */ Cite.prototype._html2wtPreProcessor = function(env, body) { // TODO }; if (typeof module === "object") { module.exports = Cite; }