mediawiki-extensions-Cite/lib/ext/Cite/index.js

814 lines
25 KiB
JavaScript
Raw Normal View History

/**
* This module implements `<ref>` and `<references>` extension tag handling
* natively in Parsoid.
* @module ext/Cite
*/
'use strict';
const ParsoidExtApi = module.parent.require('./extapi.js').versionCheck('^0.10.0');
const {
ContentUtils,
DOMDataUtils,
DOMUtils,
TokenUtils,
WTUtils,
Promise,
Sanitizer,
} = ParsoidExtApi;
/**
* Simple token transform version of the Ref extension tag.
*
* @class
*/
function Ref(cite) {
this.cite = cite;
}
function hasRef(node) {
var c = node.firstChild;
while (c) {
if (DOMUtils.isElt(c)) {
if (WTUtils.isSealedFragmentOfType(c, 'ref')) {
return true;
}
if (hasRef(c)) {
return true;
}
}
c = c.nextSibling;
}
return false;
}
Ref.prototype.toDOM = function(state, content, args) {
// Drop nested refs entirely, unless we've explicitly allowed them
if (state.parseContext.extTag === 'ref' &&
!(state.parseContext.extTagOpts && state.parseContext.extTagOpts.allowNestedRef)
) {
return null;
}
// The one supported case for nested refs is from the {{#tag:ref}} parser
// function. However, we're overly permissive here since we can't
// distinguish when that's nested in another template.
// The php preprocessor did our expansion.
const allowNestedRef = state.parseContext.inTemplate && state.parseContext.extTag !== 'ref';
return ParsoidExtApi.parseTokenContentsToDOM(state, args, '', content, {
// NOTE: sup's content model requires it only contain phrasing
// content, not flow content. However, since we are building an
// in-memory DOM which is simply a tree data structure, we can
// nest flow content in a <sup> tag.
wrapperTag: 'sup',
inTemplate: state.parseContext.inTemplate,
extTag: 'ref',
extTagOpts: {
allowNestedRef: !!allowNestedRef,
},
// FIXME: One-off PHP parser state leak.
// This needs a better solution.
inPHPBlock: true,
});
};
Ref.prototype.serialHandler = {
handle: Promise.async(function *(node, state, wrapperUnmodified) {
var startTagSrc = yield state.serializer.serializeExtensionStartTag(node, state);
var dataMw = DOMDataUtils.getDataMw(node);
var env = state.env;
var html;
if (!dataMw.body) {
return startTagSrc; // We self-closed this already.
} else if (typeof dataMw.body.html === 'string') {
// First look for the extension's content in data-mw.body.html
html = dataMw.body.html;
} else if (typeof dataMw.body.id === 'string') {
// If the body isn't contained in data-mw.body.html, look if
// there's an element pointed to by body.id.
var bodyElt = node.ownerDocument.getElementById(dataMw.body.id);
if (!bodyElt && env.page.editedDoc) {
// Try to get to it from the main page.
// This can happen when the <ref> is inside another
// extension, most commonly inside a <references>.
// The recursive call to serializeDOM puts us inside
// inside a new document.
bodyElt = env.page.editedDoc.getElementById(dataMw.body.id);
}
if (bodyElt) {
// n.b. this is going to drop any diff markers but since
// the dom differ doesn't traverse into extension content
// none should exist anyways.
DOMDataUtils.visitAndStoreDataAttribs(bodyElt);
html = ContentUtils.toXML(bodyElt, { innerXML: true });
DOMDataUtils.visitAndLoadDataAttribs(bodyElt);
} else {
// Some extra debugging for VisualEditor
var extraDebug = '';
var firstA = node.querySelector('a[href]');
if (firstA && /^#/.test(firstA.getAttribute('href'))) {
var href = firstA.getAttribute('href');
try {
var ref = node.ownerDocument.querySelector(href);
if (ref) {
extraDebug += ' [own doc: ' + ref.outerHTML + ']';
}
ref = env.page.editedDoc.querySelector(href);
if (ref) {
extraDebug += ' [main doc: ' + ref.outerHTML + ']';
}
} catch (e) { } // eslint-disable-line
if (!extraDebug) {
extraDebug = ' [reference ' + href + ' not found]';
}
}
env.log('error/' + dataMw.name,
'extension src id ' + dataMw.body.id +
' points to non-existent element for:', node.outerHTML,
'. More debug info: ', extraDebug);
return ''; // Drop it!
}
} else {
env.log('error', 'Ref body unavailable for: ' + node.outerHTML);
return ''; // Drop it!
}
var src = yield state.serializer.serializeHTML({
env: state.env,
extName: dataMw.name,
// FIXME: One-off PHP parser state leak.
// This needs a better solution.
inPHPBlock: true,
}, html);
return startTagSrc + src + '</' + dataMw.name + '>';
}),
};
Ref.prototype.lintHandler = function(ref, env, tplInfo, domLinter) {
// Don't lint the content of ref in ref, since it can lead to cycles
// using named refs
if (WTUtils.fromExtensionContent(ref, 'references')) { return ref.nextNode; }
var linkBackId = ref.firstChild.getAttribute('href').replace(/[^#]*#/, '');
var refNode = ref.ownerDocument.getElementById(linkBackId);
if (refNode) {
// Ex: Buggy input wikitext without ref content
domLinter(refNode.lastChild, env, tplInfo.isTemplated ? tplInfo : null);
}
return ref.nextNode;
};
/**
* Helper class used by `<references>` implementation.
* @class
*/
function RefGroup(group) {
this.name = group || '';
this.refs = [];
this.indexByName = new Map();
}
function makeValidIdAttr(val) {
// Looks like Cite.php doesn't try to fix ids that already have
// a "_" in them. Ex: name="a b" and name="a_b" are considered
// identical. Not sure if this is a feature or a bug.
// It also considers entities equal to their encoding
// (i.e. '&' === '&amp;'), which is done:
// in PHP: Sanitizer#decodeTagAttributes and
// in Parsoid: ExtensionHandler#normalizeExtOptions
return Sanitizer.escapeIdForAttribute(val);
}
RefGroup.prototype.renderLine = function(env, refsList, ref) {
var ownerDoc = refsList.ownerDocument;
// Generate the li and set ref content first, so the HTML gets parsed.
// We then append the rest of the ref nodes before the first node
var li = ownerDoc.createElement('li');
DOMDataUtils.addAttributes(li, {
'about': "#" + ref.target,
'id': ref.target,
'class': ['rtl', 'ltr'].includes(ref.dir) ? 'mw-cite-dir-' + ref.dir : undefined,
});
var reftextSpan = ownerDoc.createElement('span');
DOMDataUtils.addAttributes(reftextSpan, {
'id': "mw-reference-text-" + ref.target,
'class': "mw-reference-text",
});
if (ref.content) {
var content = env.fragmentMap.get(ref.content)[0];
DOMUtils.migrateChildrenBetweenDocs(content, reftextSpan);
DOMDataUtils.visitAndLoadDataAttribs(reftextSpan);
}
li.appendChild(reftextSpan);
// Generate leading linkbacks
var createLinkback = function(href, group, text) {
var a = ownerDoc.createElement('a');
var s = ownerDoc.createElement('span');
var textNode = ownerDoc.createTextNode(text + " ");
a.setAttribute('href', env.page.titleURI + '#' + href);
s.setAttribute('class', 'mw-linkback-text');
if (group) {
a.setAttribute('data-mw-group', group);
}
s.appendChild(textNode);
a.appendChild(s);
return a;
};
if (ref.linkbacks.length === 1) {
var linkback = createLinkback(ref.id, ref.group, '↑');
linkback.setAttribute('rel', 'mw:referencedBy');
li.insertBefore(linkback, reftextSpan);
} else {
// 'mw:referencedBy' span wrapper
var span = ownerDoc.createElement('span');
span.setAttribute('rel', 'mw:referencedBy');
li.insertBefore(span, reftextSpan);
ref.linkbacks.forEach(function(lb, i) {
span.appendChild(createLinkback(lb, ref.group, i + 1));
});
}
// Space before content node
li.insertBefore(ownerDoc.createTextNode(' '), reftextSpan);
// Add it to the ref list
refsList.appendChild(li);
};
/**
* @class
*/
function ReferencesData(env) {
this.index = 0;
this.env = env;
this.refGroups = new Map();
}
ReferencesData.prototype.getRefGroup = function(groupName, allocIfMissing) {
groupName = groupName || '';
if (!this.refGroups.has(groupName) && allocIfMissing) {
this.refGroups.set(groupName, new RefGroup(groupName));
}
return this.refGroups.get(groupName);
};
ReferencesData.prototype.removeRefGroup = function(groupName) {
if (groupName !== null && groupName !== undefined) {
// '' is a valid group (the default group)
this.refGroups.delete(groupName);
}
};
ReferencesData.prototype.add = function(env, groupName, refName, about, skipLinkback) {
var group = this.getRefGroup(groupName, true);
refName = makeValidIdAttr(refName);
var ref;
if (refName && group.indexByName.has(refName)) {
ref = group.indexByName.get(refName);
if (ref.content && !ref.hasMultiples) {
ref.hasMultiples = true;
// Use the non-pp version here since we've already stored attribs
// before putting them in the map.
ref.cachedHtml = ContentUtils.toXML(env.fragmentMap.get(ref.content)[0], { innerXML: true });
}
} else {
// The ids produced Cite.php have some particulars:
// Simple refs get 'cite_ref-' + index
// Refs with names get 'cite_ref-' + name + '_' + index + (backlink num || 0)
// Notes (references) whose ref doesn't have a name are 'cite_note-' + index
// Notes whose ref has a name are 'cite_note-' + name + '-' + index
var n = this.index;
var refKey = (1 + n) + '';
var refIdBase = 'cite_ref-' + (refName ? refName + '_' + refKey : refKey);
var noteId = 'cite_note-' + (refName ? refName + '-' + refKey : refKey);
// bump index
this.index += 1;
ref = {
about: about,
content: null,
dir: '',
group: group.name,
groupIndex: group.refs.length + 1,
index: n,
key: refIdBase,
id: (refName ? refIdBase + '-0' : refIdBase),
linkbacks: [],
name: refName,
target: noteId,
hasMultiples: false,
// Just used for comparison when we have multiples
cachedHtml: '',
};
group.refs.push(ref);
if (refName) {
group.indexByName.set(refName, ref);
}
}
if (!skipLinkback) {
ref.linkbacks.push(ref.key + '-' + ref.linkbacks.length);
}
return ref;
};
/**
* @class
*/
function References(cite) {
this.cite = cite;
}
var createReferences = function(env, doc, body, refsOpts, modifyDp, autoGenerated) {
var ol = doc.createElement('ol');
ol.classList.add('mw-references');
ol.classList.add('references');
if (body) {
DOMUtils.migrateChildren(body, ol);
}
// Support the `responsive` parameter
var rrOpts = env.conf.wiki.responsiveReferences;
var responsiveWrap = rrOpts.enabled;
if (refsOpts.responsive !== null) {
responsiveWrap = refsOpts.responsive !== '0';
}
var frag;
if (responsiveWrap) {
var div = doc.createElement('div');
div.classList.add('mw-references-wrap');
div.appendChild(ol);
frag = div;
} else {
frag = ol;
}
if (autoGenerated) {
DOMDataUtils.addAttributes(frag, {
typeof: 'mw:Extension/references',
about: env.newAboutId(),
});
}
var dp = DOMDataUtils.getDataParsoid(frag);
if (refsOpts.group) { // No group for the empty string either
dp.group = refsOpts.group;
ol.setAttribute('data-mw-group', refsOpts.group);
}
if (typeof modifyDp === 'function') {
modifyDp(dp);
}
return frag;
};
References.prototype.toDOM = function(state, content, args) {
return ParsoidExtApi.parseTokenContentsToDOM(state, args, '', content, {
wrapperTag: 'div',
extTag: 'references',
inTemplate: state.parseContext.inTemplate,
}).then(function(doc) {
var refsOpts = Object.assign({
group: null,
responsive: null,
}, TokenUtils.kvToHash(args, true));
var frag = createReferences(state.env, doc, doc.body, refsOpts, function(dp) {
dp.src = state.extToken.getAttribute('source');
// Redundant - also present on doc.body.firstChild, but feels cumbersome to use
dp.selfClose = state.extToken.dataAttribs.selfClose;
});
doc.body.appendChild(frag);
return doc;
});
};
var _processRefs;
References.prototype.extractRefFromNode = function(node, refsData, cite,
referencesAboutId, referencesGroup, nestedRefsHTML) {
var env = refsData.env;
var doc = node.ownerDocument;
var nestedInReferences = referencesAboutId !== undefined;
// This is data-parsoid from the dom fragment node that's gone through
// dsr computation and template wrapping.
var nodeDp = DOMDataUtils.getDataParsoid(node);
var typeOf = node.getAttribute('typeof');
var isTplWrapper = /\bmw:Transclusion\b/.test(typeOf);
var nodeType = (typeOf || '').replace(/mw:DOMFragment\/sealed\/ref/, '');
var content = nodeDp.html;
var tplDmw = isTplWrapper ? DOMDataUtils.getDataMw(node) : null;
// This is the <sup> that's the meat of the sealed fragment
var c = env.fragmentMap.get(content)[0];
// All the actions that require loaded data-attributes on `c` are done
// here so that we can quickly store those away for later.
DOMDataUtils.visitAndLoadDataAttribs(c);
var cDp = DOMDataUtils.getDataParsoid(c);
var refDmw = DOMDataUtils.getDataMw(c);
if (!cDp.empty && hasRef(c)) { // nested ref-in-ref
_processRefs(env, cite, refsData, c);
}
DOMDataUtils.visitAndStoreDataAttribs(c);
// Use the about attribute on the wrapper with priority, since it's
// only added when the wrapper is a template sibling.
const about = node.getAttribute('about') || c.getAttribute('about');
// FIXME(SSS): Need to clarify semantics here.
// If both the containing <references> elt as well as the nested <ref>
// elt has a group attribute, what takes precedence?
var group = refDmw.attrs.group || referencesGroup || '';
var refName = refDmw.attrs.name || '';
var ref = refsData.add(env, group, refName, about, nestedInReferences);
// Add ref-index linkback
var linkBack = doc.createElement('sup');
// FIXME: Lot of useless work for an edge case
if (cDp.empty) {
// Discard wrapper if there was no input wikitext
content = null;
if (cDp.selfClose) {
refDmw.body = undefined;
} else {
refDmw.body = { 'html': '' };
}
} else {
// If there are multiple <ref>s with the same name, but different content,
// the content of the first <ref> shows up in the <references> section.
// in order to ensure lossless RT-ing for later <refs>, we have to record
// HTML inline for all of them.
var html = '';
var contentDiffers = false;
if (ref.hasMultiples) {
// Use the non-pp version here since we've already stored attribs
// before putting them in the map.
html = ContentUtils.toXML(c, { innerXML: true });
contentDiffers = html !== ref.cachedHtml;
}
if (contentDiffers) {
refDmw.body = { 'html': html };
} else {
refDmw.body = { 'id': "mw-reference-text-" + ref.target };
}
}
DOMDataUtils.addAttributes(linkBack, {
'about': about,
'class': 'mw-ref',
'id': nestedInReferences ? undefined :
(ref.name ? ref.linkbacks[ref.linkbacks.length - 1] : ref.id),
'rel': 'dc:references',
'typeof': nodeType,
});
DOMDataUtils.addTypeOf(linkBack, "mw:Extension/ref");
var dataParsoid = {
src: nodeDp.src,
dsr: nodeDp.dsr,
pi: nodeDp.pi,
};
DOMDataUtils.setDataParsoid(linkBack, dataParsoid);
if (isTplWrapper) {
DOMDataUtils.setDataMw(linkBack, tplDmw);
} else {
DOMDataUtils.setDataMw(linkBack, refDmw);
}
// refLink is the link to the citation
var refLink = doc.createElement('a');
DOMDataUtils.addAttributes(refLink, {
'href': env.page.titleURI + '#' + ref.target,
'style': 'counter-reset: mw-Ref ' + ref.groupIndex + ';',
});
if (ref.group) {
refLink.setAttribute('data-mw-group', ref.group);
}
// refLink-span which will contain a default rendering of the cite link
// for browsers that don't support counters
var refLinkSpan = doc.createElement('span');
refLinkSpan.setAttribute('class', 'mw-reflink-text');
refLinkSpan.appendChild(doc.createTextNode("[" +
(ref.group ? ref.group + " " : "") + ref.groupIndex + "]"));
refLink.appendChild(refLinkSpan);
linkBack.appendChild(refLink);
if (!nestedInReferences) {
node.parentNode.replaceChild(linkBack, node);
} else {
// We don't need to delete the node now since it'll be removed in
// `insertReferencesIntoDOM` when all the children all cleaned out.
nestedRefsHTML.push(ContentUtils.ppToXML(linkBack), '\n');
}
// Keep the first content to compare multiple <ref>s with the same name.
if (!ref.content) {
ref.content = content;
ref.dir = (refDmw.attrs.dir || '').toLowerCase();
}
};
References.prototype.insertReferencesIntoDOM = function(refsNode, refsData, nestedRefsHTML, autoGenerated) {
var env = refsData.env;
var isTplWrapper = /\bmw:Transclusion\b/.test(refsNode.getAttribute('typeof'));
var dp = DOMDataUtils.getDataParsoid(refsNode);
var group = dp.group || '';
if (!isTplWrapper) {
var dataMw = DOMDataUtils.getDataMw(refsNode);
if (!Object.keys(dataMw).length) {
// FIXME: This can be moved to `insertMissingReferencesIntoDOM`
console.assert(autoGenerated);
dataMw = {
'name': 'references',
'attrs': {
'group': group || undefined, // Dont emit empty keys
},
};
DOMDataUtils.setDataMw(refsNode, dataMw);
}
// Mark this auto-generated so that we can skip this during
// html -> wt and so that clients can strip it if necessary.
if (autoGenerated) {
dataMw.autoGenerated = true;
} else if (nestedRefsHTML.length > 0) {
dataMw.body = { 'html': '\n' + nestedRefsHTML.join('') };
} else if (!dp.selfClose) {
dataMw.body = { 'html': '' };
} else {
dataMw.body = undefined;
}
dp.selfClose = undefined;
}
var refGroup = refsData.getRefGroup(group);
// Deal with responsive wrapper
if (refsNode.classList.contains('mw-references-wrap')) {
var rrOpts = env.conf.wiki.responsiveReferences;
if (refGroup && refGroup.refs.length > rrOpts.threshold) {
refsNode.classList.add('mw-references-columns');
}
refsNode = refsNode.firstChild;
}
// Remove all children from the references node
//
// Ex: When {{Reflist}} is reused from the cache, it comes with
// a bunch of references as well. We have to remove all those cached
// references before generating fresh references.
while (refsNode.firstChild) {
refsNode.removeChild(refsNode.firstChild);
}
if (refGroup) {
refGroup.refs.forEach(ref => refGroup.renderLine(env, refsNode, ref));
}
// Remove the group from refsData
refsData.removeRefGroup(group);
};
/**
* Process `<ref>`s left behind after the DOM is fully processed.
* We process them as if there was an implicit `<references />` tag at
* the end of the DOM.
*/
References.prototype.insertMissingReferencesIntoDOM = function(refsData, node) {
var env = refsData.env;
var doc = node.ownerDocument;
refsData.refGroups.forEach((refsValue, refsGroup) => {
var frag = createReferences(env, doc, null, {
group: refsGroup,
responsive: null,
}, function(dp) {
// The new references come out of "nowhere", so to make selser work
// propertly, add a zero-sized DSR pointing to the end of the document.
dp.dsr = [env.page.src.length, env.page.src.length, 0, 0];
}, true);
// Add a \n before the <ol> so that when serialized to wikitext,
// each <references /> tag appears on its own line.
node.appendChild(doc.createTextNode("\n"));
node.appendChild(frag);
this.insertReferencesIntoDOM(frag, refsData, [""], true);
});
};
References.prototype.serialHandler = {
handle: Promise.async(function *(node, state, wrapperUnmodified) {
var dataMw = DOMDataUtils.getDataMw(node);
if (dataMw.autoGenerated && state.rtTestMode) {
// Eliminate auto-inserted <references /> noise in rt-testing
return '';
} else {
var startTagSrc = yield state.serializer.serializeExtensionStartTag(node, state);
if (!dataMw.body) {
return startTagSrc; // We self-closed this already.
} else if (typeof dataMw.body.html === 'string') {
var src = yield state.serializer.serializeHTML({
env: state.env,
extName: dataMw.name,
}, dataMw.body.html);
return startTagSrc + src + '</' + dataMw.name + '>';
} else {
state.env.log('error',
'References body unavailable for: ' + node.outerHTML);
return ''; // Drop it!
}
}
}),
// FIXME: LEAKY -- Should we expose newline constraints to extensions?
before: function(node, otherNode, state) {
// Serialize new references tags on a new line.
if (WTUtils.isNewElt(node)) {
return { min: 1, max: 2 };
} else {
return null;
}
},
};
References.prototype.lintHandler = function(refs, env, tplInfo, domLinter) {
// Nothing to do
//
// FIXME: Not entirely true for scenarios where the <ref> tags
// are defined in the references section that is itself templated.
//
// {{1x|<references>\n<ref name='x'><b>foo</ref>\n</references>}}
//
// In this example, the references tag has the right tplInfo and
// when the <ref> tag is processed in the body of the article where
// it is accessed, there is no relevant template or dsr info available.
//
// Ignoring for now.
return refs.nextNode;
};
/**
* This handles wikitext like this:
* ```
* <references> <ref>foo</ref> </references>
* <references> <ref>bar</ref> </references>
* ```
* @private
*/
var _processRefsInReferences = function(cite, refsData, node, referencesId,
referencesGroup, nestedRefsHTML) {
var child = node.firstChild;
while (child !== null) {
var nextChild = child.nextSibling;
if (DOMUtils.isElt(child)) {
if (WTUtils.isSealedFragmentOfType(child, 'ref')) {
cite.references.extractRefFromNode(child, refsData, cite,
referencesId, referencesGroup, nestedRefsHTML);
} else if (child.hasChildNodes()) {
_processRefsInReferences(cite, refsData,
child, referencesId, referencesGroup, nestedRefsHTML);
}
}
child = nextChild;
}
};
_processRefs = function(env, cite, refsData, node) {
var child = node.firstChild;
while (child !== null) {
var nextChild = child.nextSibling;
if (DOMUtils.isElt(child)) {
if (WTUtils.isSealedFragmentOfType(child, 'ref')) {
cite.references.extractRefFromNode(child, refsData, cite);
} else if ((/(?:^|\s)mw:Extension\/references(?=$|\s)/).test(child.getAttribute('typeOf'))) {
var referencesId = child.getAttribute("about");
var referencesGroup = DOMDataUtils.getDataParsoid(child).group;
var nestedRefsHTML = [];
_processRefsInReferences(cite, refsData,
child, referencesId, referencesGroup, nestedRefsHTML);
cite.references.insertReferencesIntoDOM(child, refsData, nestedRefsHTML);
} else {
// inline media -- look inside the data-mw attribute
if (WTUtils.isInlineMedia(child)) {
/* -----------------------------------------------------------------
Add media info in a post-processing pass The basic idea here is to generate the media structure in the token stream using a stuffed span with a redlink, as in T169975, and augmenting the nodes on the DOM once the media info has been fetched. A redlink is justified as the canonical representation of the media elements before info is fetched because it's the fallback if fetching fails and the media type is unknown until the info is retrieved. Most options are stored in data-mw until the media type is fetched and it's determined that they're applicable. This is a bit of a reversion of how things were done before where inapplicable options were removed post-facto. For consistency and styling's sake, figcaptions are now always added to block figures. The pass has to be run before generating headings anchor, since that depends on the text content (ie. redlinks). This rearranges things in the post-processor and adds another pass. The post-processing pass to add media info is run on subpipelines as well as the top level so that the media info is present in cases where we embed HTML in data-mw (which is currently skipped by the top level only passes, except for the cite extension, which has special handling, see T214994) and to avoid an additional post-processing pass for the gallery extension, which scales media of packed galleries. This comes at the cost of making additional queries for each pipeline and requires the add media pass to be idempotent. Filed T214241 for figuring out what to do about data-mw info being clobbered by template annotations. The newly failing blacklisted tests are from roundtripping media options in galleries, which requires a general refactor for support. See the FIXMEs added there. Performance should be expected to regress by the amount of work we're able to overlap in the async phase of the pipeline while the media info is being fetched. Considering a lot of that work is caught up waiting for the batch to return (other async requests are found in the same batch), this doesn't turn out to be much in practice in the average case. Bug: T153080 Bug: T169975 Change-Id: I856ee962b70cef1f8d49652396ea5264e11a8ade
2018-09-20 16:21:45 +00:00
* FIXME(subbu): This works but feels very special-cased in 2 ways:
*
* 1. special cased to images vs. any node that might have
* serialized HTML embedded in data-mw
* 2. special cased to global cite handling -- the general scenario
* is DOM post-processors that do different things on the
* top-level vs not.
* - Cite needs to process these fragments in the context of the
* top-level page, and has to be done in order of how the nodes
* are encountered.
* - DOM cleanup can be done on embedded fragments without
* any page-level context and in any order.
* - So, some variability here.
*
* We should be running dom.cleanup.js passes on embedded html
* in data-mw and other attributes. Since correctness doesn't
* depend on that cleanup, I am not adding more special-case
* code in dom.cleanup.js.
*
* Doing this more generically will require creating a DOMProcessor
* class and adding state to it.
Add media info in a post-processing pass The basic idea here is to generate the media structure in the token stream using a stuffed span with a redlink, as in T169975, and augmenting the nodes on the DOM once the media info has been fetched. A redlink is justified as the canonical representation of the media elements before info is fetched because it's the fallback if fetching fails and the media type is unknown until the info is retrieved. Most options are stored in data-mw until the media type is fetched and it's determined that they're applicable. This is a bit of a reversion of how things were done before where inapplicable options were removed post-facto. For consistency and styling's sake, figcaptions are now always added to block figures. The pass has to be run before generating headings anchor, since that depends on the text content (ie. redlinks). This rearranges things in the post-processor and adds another pass. The post-processing pass to add media info is run on subpipelines as well as the top level so that the media info is present in cases where we embed HTML in data-mw (which is currently skipped by the top level only passes, except for the cite extension, which has special handling, see T214994) and to avoid an additional post-processing pass for the gallery extension, which scales media of packed galleries. This comes at the cost of making additional queries for each pipeline and requires the add media pass to be idempotent. Filed T214241 for figuring out what to do about data-mw info being clobbered by template annotations. The newly failing blacklisted tests are from roundtripping media options in galleries, which requires a general refactor for support. See the FIXMEs added there. Performance should be expected to regress by the amount of work we're able to overlap in the async phase of the pipeline while the media info is being fetched. Considering a lot of that work is caught up waiting for the batch to return (other async requests are found in the same batch), this doesn't turn out to be much in practice in the average case. Bug: T153080 Bug: T169975 Change-Id: I856ee962b70cef1f8d49652396ea5264e11a8ade
2018-09-20 16:21:45 +00:00
*
* See T214994
* ----------------------------------------------------------------- */
var dmw = DOMDataUtils.getDataMw(child);
var caption = dmw.caption;
if (caption) {
// Extract the caption HTML, build the DOM, process refs,
// serialize to HTML, update the caption HTML.
var captionDOM = ContentUtils.ppToDOM(env, caption);
_processRefs(env, cite, refsData, captionDOM);
dmw.caption = ContentUtils.ppToXML(captionDOM, { innerXML: true });
}
}
if (child.hasChildNodes()) {
_processRefs(env, cite, refsData, child);
}
}
}
child = nextChild;
}
};
/**
* Native Parsoid implementation of the Cite extension
* that ties together `<ref>` and `<references>`.
*/
var Cite = function() {
this.ref = new Ref(this);
this.references = new References(this);
this.config = {
name: 'cite',
domProcessors: {
wt2htmlPostProcessor: (...args) => this._wt2htmlPostProcessor(...args),
html2wtPreProcessor: (...args) => this._html2wtPreProcessor(...args),
},
tags: [
{
name: 'ref',
toDOM: (...args) => this.ref.toDOM(...args),
fragmentOptions: {
unwrapFragment: false,
},
serialHandler: this.ref.serialHandler, // FIXME: Rename to toWikitext
lintHandler: this.ref.lintHandler,
// FIXME: Do we need (a) domDiffHandler (b) ... others ...
}, {
name: 'references',
toDOM: (...args) => this.references.toDOM(...args),
serialHandler: this.references.serialHandler,
lintHandler: this.references.lintHandler,
},
],
styles: [
'ext.cite.style',
'ext.cite.styles',
],
};
};
/**
* wt -> html DOM PostProcessor
*/
Cite.prototype._wt2htmlPostProcessor = function(body, env, options, atTopLevel) {
if (atTopLevel) {
var refsData = new ReferencesData(env);
_processRefs(env, this, refsData, body);
this.references.insertMissingReferencesIntoDOM(refsData, body);
}
};
/**
* html -> wt DOM PreProcessor
*
* This is to reconstitute page-level information from local annotations
* left behind by editing clients.
*
* Editing clients add inserted: true or deleted: true properties to a <ref>'s
* data-mw object. These are no-ops for non-named <ref>s. For named <ref>s,
* - for inserted refs, we might want to de-duplicate refs.
* - for deleted refs, if the primary ref was deleted, we have to transfer
* the primary ref designation to another instance of the named ref.
*/
Cite.prototype._html2wtPreProcessor = function(env, body) {
// TODO
};
if (typeof module === "object") {
module.exports = Cite;
}