Cite: Get rid of tokenHandler that relies on Parsoid internals

* This still exposes and exploits some Parsoid internals, but
  that can be fixed in the next round of updates.

* Cite (and any other extension) that want to manage fragments
  on their own vs. running the default fragment unpacking routine
  can now specify that they don't want the content unwrapped.

* Changes to parser tests are to rearrange the attrs and body
  attributes which switched positions in data-mw.

* The blacklist changes show that there has actually been an
  improvement in test failures.

Change-Id: I1e1a651e8f2d6d9456bb5849b0bce1f8a87c4446
This commit is contained in:
Subramanya Sastry 2018-06-08 23:37:17 -05:00 committed by jenkins-bot
parent 116d3c3744
commit 53ae5aac06

View file

@ -12,14 +12,8 @@ var ParsoidExtApi = module.parent.require('./extapi.js').versionCheck('^0.9.0');
var Util = ParsoidExtApi.Util;
var DU = ParsoidExtApi.DOMUtils;
var Promise = ParsoidExtApi.Promise;
var defines = ParsoidExtApi.defines;
var Sanitizer = module.parent.require('../wt2html/tt/Sanitizer.js').Sanitizer;
// define some constructor shortcuts
var KV = defines.KV;
var SelfclosingTagTk = defines.SelfclosingTagTk;
/**
* Simple token transform version of the Ref extension tag.
*
@ -33,8 +27,7 @@ function hasRef(node) {
var c = node.firstChild;
while (c) {
if (DU.isElt(c)) {
var typeOf = c.getAttribute('typeof');
if ((/(?:^|\s)mw:Extension\/ref\/Marker(?=$|\s)/).test(typeOf)) {
if (DU.isSealedFragmentOfType(c, 'ref')) {
return true;
}
if (hasRef(c)) {
@ -46,69 +39,29 @@ function hasRef(node) {
return false;
}
/**
* Handle ref tokens.
*/
Ref.prototype.tokenHandler = function(manager, pipelineOpts, refTok, cb) {
var refOpts = Object.assign({
name: null,
group: null,
}, Util.kvToHash(refTok.getAttribute("options"), true));
var finalCB = function(toks, contentBody) {
// Marker meta with ref content
var da = Util.clone(refTok.dataAttribs);
// Clear stx='html' so that sanitizer doesn't barf
da.stx = undefined;
da.group = refOpts.group || '';
da.name = refOpts.name || '';
if (contentBody) {
da.hasRefInRef = hasRef(contentBody);
DU.visitDOM(contentBody, DU.storeDataAttribs);
da.content = manager.env.setFragment(contentBody);
} else {
da.hasRefInRef = false;
da.content = '';
}
toks.push(new SelfclosingTagTk('meta', [
new KV('typeof', 'mw:Extension/ref/Marker'),
new KV('about', manager.env.newAboutId()),
], da));
// All done!
cb({ tokens: toks, async: false });
};
Ref.prototype.toDOM = function(state, content, args) {
// Drop nested refs entirely, unless we've explicitly allowed them
if (pipelineOpts.extTag === 'ref' && !pipelineOpts.allowNestedRef) {
cb({ tokens: [], async: false });
return;
if (state.parseContext.extTag === 'ref' && !state.parseContext.allowNestedRef) {
return null;
}
// The one supported case for nested refs is from the {{#tag:ref}} parser
// function. However, we're overly permissive here since we can't
// distinguish when that's nested in another template. The php
// preprocessor did our expansion.
var allowNestedRef = pipelineOpts.inTemplate && pipelineOpts.extTag !== 'ref';
// distinguish when that's nested in another template.
// The php preprocessor did our expansion.
const allowNestedRef = state.parseContext.inTemplate && state.parseContext.extTag !== 'ref';
Util.processExtSource(manager, refTok, {
// Full pipeline for processing ref-content
pipelineType: 'text/x-mediawiki/full',
pipelineOpts: {
extTag: "ref",
inTemplate: pipelineOpts.inTemplate,
allowNestedRef: allowNestedRef,
noPre: true,
noPWrapping: true,
},
res: [],
parentCB: cb,
emptyContentCB: finalCB,
documentCB: function(refContentDoc) {
finalCB([], refContentDoc.body);
},
return ParsoidExtApi.parseWikitextToDOM(state, args, '', content, {
// NOTE: sup's content model requires it only contain phrasing
// content, not flow content. However, since we are building an
// in-memory DOM which is simply a tree data structure, we can
// nest flow content in a <sup> tag.
wrapperTag: 'sup',
extTag: 'ref',
inTemplate: state.parseContext.inTemplate,
allowNestedRef: allowNestedRef,
noPWrapping: true,
noPre: true,
});
};
@ -229,7 +182,7 @@ RefGroup.prototype.renderLine = function(env, refsList, ref) {
'class': "mw-reference-text",
});
if (ref.content) {
var content = env.fragmentMap.get(ref.content);
var content = env.fragmentMap.get(ref.content)[0];
DU.migrateChildrenBetweenDocs(content, reftextSpan);
DU.visitDOM(reftextSpan, DU.loadDataAttribs);
}
@ -306,7 +259,7 @@ ReferencesData.prototype.add = function(env, groupName, refName, about, skipLink
ref.hasMultiples = true;
// Use the non-pp version here since we've already stored attribs
// before putting them in the map.
ref.cachedHtml = DU.toXML(env.fragmentMap.get(ref.content), { innerXML: true });
ref.cachedHtml = DU.toXML(env.fragmentMap.get(ref.content)[0], { innerXML: true });
}
} else {
// The ids produced Cite.php have some particulars:
@ -358,7 +311,7 @@ function References(cite) {
var dummyDoc = domino.createDocument();
var createReferences = function(env, body, about, refsOpts, modifyDp) {
var createReferences = function(env, body, refsOpts, modifyDp, autoGenerated) {
var doc = body ? body.ownerDocument : dummyDoc;
var ol = doc.createElement('ol');
@ -386,10 +339,12 @@ var createReferences = function(env, body, about, refsOpts, modifyDp) {
frag = ol;
}
DU.addAttributes(frag, {
typeof: 'mw:Extension/references',
about: about,
});
if (autoGenerated) {
DU.addAttributes(frag, {
typeof: 'mw:Extension/references',
about: env.newAboutId(),
});
}
var dp = DU.getDataParsoid(frag);
if (refsOpts.group) { // No group for the empty string either
@ -407,63 +362,25 @@ var createReferences = function(env, body, about, refsOpts, modifyDp) {
return frag;
};
/**
* Sanitize the references tag and convert it into a meta-token.
*/
References.prototype.tokenHandler = function(manager, pipelineOpts, refsTok, cb) {
var env = manager.env;
var kvs = refsTok.getAttribute('options');
var opts = Util.kvToHash(kvs, true);
References.prototype.toDOM = function(state, content, args) {
return ParsoidExtApi.parseWikitextToDOM(state, args, '', content, {
wrapperTag: 'div',
extTag: 'references',
inTemplate: state.parseContext.inTemplate,
noPWrapping: true,
noPre: true,
}).then(function(doc) {
var refsOpts = Object.assign({
group: null,
responsive: null,
}, Util.kvToHash(args, true));
var refsOpts = Object.assign({
group: null,
responsive: null,
}, opts);
// Assign an about id and intialize the nested refs html
var referencesId = env.newAboutId();
// Emit a marker mw:DOMFragment for the references
// token so that the dom post processor can generate
// and emit references at this point in the DOM.
var emitReferencesFragment = function(toks, body) {
var frag = createReferences(env, body, referencesId, refsOpts, function(dp) {
dp.src = refsTok.getAttribute('source');
var frag = createReferences(state.manager.env, doc.body, refsOpts, function(dp) {
dp.src = state.extToken.getAttribute('source');
});
if (!body) { body = dummyDoc.createElement('body'); }
body.appendChild(frag);
cb({
async: false,
tokens: DU.buildDOMFragmentTokens(env, refsTok, body, null, {
aboutId: referencesId,
isForeignContent: true,
// The <ol> HTML above is wrapper HTML added on and doesn't
// have any DSR on it. We want DSR added to it.
setDSR: true,
}),
});
};
doc.body.appendChild(frag);
Util.processExtSource(manager, refsTok, {
// Partial pipeline for processing ref-content
// Expand till stage 2 so that all embedded
// ref tags get processed
pipelineType: 'text/x-mediawiki/full',
pipelineOpts: {
// In order to associated ref-tags nested here with this references
// object, we have to pass along the references id.
extTag: "references",
extTagId: referencesId,
wrapTemplates: pipelineOpts.wrapTemplates,
inTemplate: pipelineOpts.inTemplate,
},
res: [],
parentCB: cb,
emptyContentCB: emitReferencesFragment,
endCB: emitReferencesFragment,
documentCB: function(refsDoc) {
emitReferencesFragment([], refsDoc.body);
},
return doc;
});
};
@ -473,31 +390,45 @@ References.prototype.extractRefFromNode = function(node, refsData, cite,
referencesAboutId, referencesGroup, nestedRefsHTML) {
var env = refsData.env;
var nestedInReferences = referencesAboutId !== undefined;
var isTplWrapper = /\bmw:Transclusion\b/.test(node.getAttribute('typeof'));
var tplDmw;
var dp = DU.getDataParsoid(node);
var refDmw = Util.clone(DU.getDataMw(node));
if (isTplWrapper) {
tplDmw = refDmw;
refDmw = dp.nestedDmw;
}
// SSS FIXME: Need to clarify semantics here.
// If both the containing <references> elt as well as the nested <ref>
// elt has a group attribute, what takes precedence?
var group = dp.group || referencesGroup || '';
var refName = dp.name;
var group = refDmw.attrs.group || referencesGroup || '';
var refName = refDmw.attrs.name || '';
var about = node.getAttribute("about");
var ref = refsData.add(env, group, refName, about, nestedInReferences);
var nodeType = (node.getAttribute("typeof") || '').replace(/mw:Extension\/ref\/Marker/, '');
var nodeType = (node.getAttribute("typeof") || '').replace(/mw:DOMFragment\/sealed\/ref/, '');
// Add ref-index linkback
var doc = node.ownerDocument;
var linkBack = doc.createElement('sup');
var content = dp.content;
var dataMw = Util.clone(DU.getDataMw(node));
var body;
var content = dp.html;
if (dp.hasRefInRef) {
var c = env.fragmentMap.get(content);
DU.visitDOM(c, DU.loadDataAttribs);
_processRefs(cite, refsData, c);
var c = env.fragmentMap.get(content)[0];
DU.visitDOM(c, DU.loadDataAttribs); // FIXME: Lot of useless work for an edge case
if (DU.getDataParsoid(c).empty) {
// Discard wrapper if there was no input wikitext
content = null;
// Setting to null seems unnecessary.
// undefined might be sufficient.
// But, can be cleaned up separately.
refDmw.body = null;
} else {
if (hasRef(c)) { // nested ref-in-ref
_processRefs(cite, refsData, c);
}
DU.visitDOM(c, DU.storeDataAttribs);
}
if (content) {
// If there are multiple <ref>s with the same name, but different content,
// the content of the first <ref> shows up in the <references> section.
// in order to ensure lossless RT-ing for later <refs>, we have to record
@ -507,36 +438,16 @@ References.prototype.extractRefFromNode = function(node, refsData, cite,
if (ref.hasMultiples) {
// Use the non-pp version here since we've already stored attribs
// before putting them in the map.
html = DU.toXML(env.fragmentMap.get(content), { innerXML: true });
html = DU.toXML(env.fragmentMap.get(content)[0], { innerXML: true });
contentDiffers = html !== ref.cachedHtml;
}
if (contentDiffers) {
body = { 'html': html };
refDmw.body = { 'html': html };
} else {
body = { 'id': "mw-reference-text-" + ref.target };
refDmw.body = { 'id': "mw-reference-text-" + ref.target };
}
}
// data-mw will not be empty in scenarios where the <ref> is also templated.
// In those cases, the transclusion markup takes precedence over the <ref> markup.
// So, we aren't updating data-mw.
if (!Object.keys(dataMw).length) {
dataMw = {
'name': 'ref',
// Dont set body if this is a reused reference
// like <ref name='..' /> with empty content.
'body': body,
'attrs': {
// 1. Use 'dp.group' (which is the group attribute that the ref node had)
// rather than use 'group' (which could be the group from an enclosing
// <references> tag).
// 2. Dont emit empty keys
'group': dp.group || undefined,
'name': refName || undefined,
},
};
}
DU.addAttributes(linkBack, {
'about': about,
'class': 'mw-ref',
@ -552,7 +463,11 @@ References.prototype.extractRefFromNode = function(node, refsData, cite,
pi: dp.pi,
};
DU.setDataParsoid(linkBack, dataParsoid);
DU.setDataMw(linkBack, dataMw);
if (isTplWrapper) {
DU.setDataMw(linkBack, tplDmw);
} else {
DU.setDataMw(linkBack, refDmw);
}
// refLink is the link to the citation
var refLink = doc.createElement('a');
@ -589,33 +504,37 @@ References.prototype.extractRefFromNode = function(node, refsData, cite,
References.prototype.insertReferencesIntoDOM = function(refsNode, refsData, nestedRefsHTML, autoGenerated) {
var env = refsData.env;
var isTplWrapper = /\bmw:Transclusion\b/.test(refsNode.getAttribute('typeof'));
var dp = DU.getDataParsoid(refsNode);
var group = dp.group || '';
if (!isTplWrapper) {
var dataMw = DU.getDataMw(refsNode);
if (!Object.keys(dataMw).length) {
dataMw = {
'name': 'references',
'attrs': {
'group': group || undefined, // Dont emit empty keys
},
};
DU.setDataMw(refsNode, dataMw);
}
dataMw.attrs.responsive = dp.tmp.responsive; // Rt the `responsive` parameter
var dataMw = DU.getDataMw(refsNode);
if (!Object.keys(dataMw).length) {
dataMw = {
'name': 'references',
'attrs': {
// Dont emit empty keys
'group': group || undefined,
// Rt the `responsive` parameter
responsive: dp.tmp.responsive,
},
};
// Mark this auto-generated so that we can skip this during
// html -> wt and so that clients can strip it if necessary.
if (autoGenerated) {
dataMw.autoGenerated = true;
} else if (nestedRefsHTML.length > 0) {
dataMw.body = { 'html': '\n' + nestedRefsHTML.join('') };
} else if (dataMw.body === null) {
// Is this correct?
dataMw.body = undefined;
}
DU.setDataMw(refsNode, dataMw);
}
var refGroup = refsData.getRefGroup(group);
// Deal with responsive warpper
// Deal with responsive wrapper
if (refsNode.classList.contains('mw-references-wrap')) {
var rrOpts = env.conf.wiki.responsiveReferences;
if (refGroup && refGroup.refs.length > rrOpts.threshold) {
@ -651,14 +570,14 @@ References.prototype.insertMissingReferencesIntoDOM = function(refsData, node) {
var doc = node.ownerDocument;
refsData.refGroups.forEach((refsValue, refsGroup) => {
var frag = createReferences(env, null, env.newAboutId(), {
var frag = createReferences(env, null, {
group: refsGroup,
responsive: null,
}, function(dp) {
// The new references come out of "nowhere", so to make selser work
// propertly, add a zero-sized DSR pointing to the end of the document.
dp.dsr = [env.page.src.length, env.page.src.length, 0, 0];
});
}, true);
// Add a \n before the <ol> so that when serialized to wikitext,
// each <references /> tag appears on its own line.
@ -732,8 +651,7 @@ var _processRefsInReferences = function(cite, refsData, node, referencesId,
while (child !== null) {
var nextChild = child.nextSibling;
if (DU.isElt(child)) {
var typeOf = child.getAttribute('typeof');
if ((/(?:^|\s)mw:Extension\/ref\/Marker(?=$|\s)/).test(typeOf)) {
if (DU.isSealedFragmentOfType(child, 'ref')) {
cite.references.extractRefFromNode(child, refsData, cite,
referencesId, referencesGroup, nestedRefsHTML);
} else if (child.hasChildNodes()) {
@ -750,10 +668,9 @@ _processRefs = function(cite, refsData, node) {
while (child !== null) {
var nextChild = child.nextSibling;
if (DU.isElt(child)) {
var typeOf = child.getAttribute('typeof');
if ((/(?:^|\s)mw:Extension\/ref\/Marker(?=$|\s)/).test(typeOf)) {
if (DU.isSealedFragmentOfType(child, 'ref')) {
cite.references.extractRefFromNode(child, refsData, cite);
} else if ((/(?:^|\s)mw:Extension\/references(?=$|\s)/).test(typeOf)) {
} else if ((/(?:^|\s)mw:Extension\/references(?=$|\s)/).test(child.getAttribute('typeOf'))) {
var referencesId = child.getAttribute("about");
var referencesGroup = DU.getDataParsoid(child).group;
var nestedRefsHTML = [];
@ -820,12 +737,13 @@ var Cite = function() {
tags: [
{
name: 'ref',
tokenHandler: this.ref.tokenHandler.bind(this.ref),
toDOM: this.ref.toDOM.bind(this.ref),
unwrapContent: false,
serialHandler: this.ref.serialHandler,
lintHandler: this.ref.lintHandler,
}, {
name: 'references',
tokenHandler: this.references.tokenHandler.bind(this.references),
toDOM: this.references.toDOM.bind(this.ref),
serialHandler: this.references.serialHandler,
lintHandler: this.references.lintHandler,
},