Flatten ext/

Change-Id: I083dc0d7c5ab37c8f0f5c051e82e4705c891812d
This commit is contained in:
Arlo Breault 2016-01-13 15:34:40 -08:00
parent 1c10f71b47
commit 41756cb70b

655
lib/ext/Cite.js Normal file
View file

@ -0,0 +1,655 @@
/* ----------------------------------------------------------------------
* This file implements <ref> and <references> extension tag handling
* natively in Parsoid.
* ---------------------------------------------------------------------- */
'use strict';
require('../../core-upgrade.js');
var entities = require('entities');
var Util = require('../utils/Util.js').Util;
var DU = require('../utils/DOMUtils.js').DOMUtils;
var Promise = require('../utils/promise.js');
var defines = require('../wt2html/parser.defines.js');
// define some constructor shortcuts
var KV = defines.KV;
var SelfclosingTagTk = defines.SelfclosingTagTk;
/**
* Simple token transform version of the Ref extension tag
*
* @class
* @constructor
*/
function Ref(cite) {
this.cite = cite;
}
function hasRef(node) {
var c = node.firstChild;
while (c) {
if (DU.isElt(c)) {
var typeOf = c.getAttribute('typeof');
if ((/(?:^|\s)mw:Extension\/ref\/Marker(?=$|\s)/).test(typeOf)) {
return true;
}
if (hasRef(c)) {
return true;
}
}
c = c.nextSibling;
}
return false;
}
/**
* Handle ref tokens
*/
Ref.prototype.tokenHandler = function(manager, pipelineOpts, refTok, cb) {
// Nested <ref> tags at the top level are considered errors
// But, inside templates, they are supported
if (!pipelineOpts.inTemplate && pipelineOpts.extTag === "ref") {
cb({ tokens: [refTok.getAttribute("source")] });
return;
}
var refOpts = Object.assign({
name: null,
group: null,
}, Util.KVtoHash(refTok.getAttribute("options"), true));
var about = manager.env.newAboutId();
var finalCB = function(toks, contentBody) {
// Marker meta with ref content
var da = Util.clone(refTok.dataAttribs);
// Clear stx='html' so that sanitizer doesn't barf
da.stx = undefined;
da.group = refOpts.group || '';
da.name = refOpts.name || '';
da.content = contentBody ? DU.serializeChildren(contentBody) : '';
da.hasRefInRef = contentBody ? hasRef(contentBody) : false;
toks.push(new SelfclosingTagTk('meta', [
new KV('typeof', 'mw:Extension/ref/Marker'),
new KV('about', about),
], da));
// All done!
cb({ tokens: toks, async: false });
};
Util.processExtSource(manager, refTok, {
// Full pipeline for processing ref-content
pipelineType: 'text/x-mediawiki/full',
pipelineOpts: {
extTag: "ref",
inTemplate: pipelineOpts.inTemplate,
noPre: true,
noPWrapping: true,
},
res: [],
parentCB: cb,
emptyContentCB: finalCB,
documentCB: function(refContentDoc) {
finalCB([], refContentDoc.body);
},
});
};
/**
* Helper class used by <references> implementation
*/
function RefGroup(group) {
this.name = group || '';
this.refs = [];
this.indexByName = new Map();
}
function makeValidIdAttr(val) {
// Looks like Cite.php doesn't try to fix ids that already have
// a "_" in them. Ex: name="a b" and name="a_b" are considered
// identical. Not sure if this is a feature or a bug.
// It also considers entities equal to their encoding (i.e. '&' === '&amp;')
// and then substitutes % with .
var v = entities.decodeHTML(val).replace(/\s/g, '_');
return encodeURIComponent(v).replace(/%/g, ".");
}
RefGroup.prototype.renderLine = function(refsList, ref) {
var ownerDoc = refsList.ownerDocument;
// Generate the li and set ref content first, so the HTML gets parsed.
// We then append the rest of the ref nodes before the first node
var li = ownerDoc.createElement('li');
DU.addAttributes(li, {
'about': "#" + ref.target,
'id': ref.target,
});
var reftextSpan = ownerDoc.createElement('span');
DU.addAttributes(reftextSpan, {
'id': "mw-reference-text-" + ref.target,
'class': "mw-reference-text",
});
reftextSpan.innerHTML = ref.content;
li.appendChild(reftextSpan);
// Generate leading linkbacks
var createLinkback = function(href, group, text) {
var a = ownerDoc.createElement('a');
var span = ownerDoc.createElement('span');
var textNode = ownerDoc.createTextNode(text + " ");
a.setAttribute('href', href);
span.setAttribute('class', 'mw-linkback-text');
if (group) {
a.setAttribute('data-mw-group', group);
}
span.appendChild(textNode);
a.appendChild(span);
return a;
};
if (ref.linkbacks.length === 1) {
var linkback = createLinkback('#' + ref.id, ref.group, '↑');
linkback.setAttribute('rel', 'mw:referencedBy');
li.insertBefore(linkback, reftextSpan);
} else {
// 'mw:referencedBy' span wrapper
var span = ownerDoc.createElement('span');
span.setAttribute('rel', 'mw:referencedBy');
li.insertBefore(span, reftextSpan);
ref.linkbacks.forEach(function(lb, i) {
span.appendChild(createLinkback('#' + lb, ref.group, i + 1));
});
}
// Space before content node
li.insertBefore(ownerDoc.createTextNode(' '), reftextSpan);
// Add it to the ref list
refsList.appendChild(li);
};
function ReferencesData() {
this.index = 0;
this.refGroups = new Map();
}
ReferencesData.prototype.getRefGroup = function(groupName, allocIfMissing) {
groupName = groupName || '';
if (!this.refGroups.has(groupName) && allocIfMissing) {
this.refGroups.set(groupName, new RefGroup(groupName));
}
return this.refGroups.get(groupName);
};
ReferencesData.prototype.removeRefGroup = function(groupName) {
if (groupName !== null && groupName !== undefined) {
// '' is a valid group (the default group)
this.refGroups.delete(groupName);
}
};
ReferencesData.prototype.add = function(groupName, refName, about, skipLinkback) {
var group = this.getRefGroup(groupName, true);
var ref;
refName = makeValidIdAttr(refName);
if (refName && group.indexByName.has(refName)) {
ref = group.indexByName.get(refName);
if (ref.content) {
ref.hasMultiples = true;
}
} else {
// The ids produced Cite.php have some particulars:
// Simple refs get 'cite_ref-' + index
// Refs with names get 'cite_ref-' + name + '_' + index + (backlink num || 0)
// Notes (references) whose ref doesn't have a name are 'cite_note-' + index
// Notes whose ref has a name are 'cite_note-' + name + '-' + index
var n = this.index;
var refKey = (1 + n) + '';
var refIdBase = 'cite_ref-' + (refName ? refName + '_' + refKey : refKey);
var noteId = 'cite_note-' + (refName ? refName + '-' + refKey : refKey);
// bump index
this.index += 1;
ref = {
about: about,
content: null,
group: group.name,
groupIndex: group.refs.length + 1,
index: n,
key: refIdBase,
id: (refName ? refIdBase + '-0' : refIdBase),
linkbacks: [],
name: refName,
target: noteId,
};
group.refs.push(ref);
if (refName) {
group.indexByName.set(refName, ref);
}
}
if (!skipLinkback) {
ref.linkbacks.push(ref.key + '-' + ref.linkbacks.length);
}
return ref;
};
function References(cite) {
this.cite = cite;
}
/**
* Sanitize the references tag and convert it into a meta-token
*/
References.prototype.tokenHandler = function(manager, pipelineOpts, refsTok, cb) {
var env = manager.env;
// group is the only recognized option?
var refsOpts = Object.assign({
group: null,
}, Util.KVtoHash(refsTok.getAttribute("options"), true));
// Assign an about id and intialize the nested refs html
var referencesId = env.newAboutId();
// Emit a marker mw:DOMFragment for the references
// token so that the dom post processor can generate
// and emit references at this point in the DOM.
var emitReferencesFragment = function(toks, refsBody) {
var olHTML = "<ol class='mw-references'" +
" typeof='mw:Extension/references'" +
" about='" + referencesId + "'" + ">" + (refsBody || "") + "</ol>";
var olProcessor = function(ol) {
var dp = DU.getDataParsoid(ol);
dp.src = refsTok.getAttribute('source');
if (refsOpts.group) {
dp.group = refsOpts.group;
ol.setAttribute('data-mw-group', refsOpts.group);
}
DU.storeDataParsoid(ol, dp);
};
cb({
async: false,
tokens: DU.buildDOMFragmentTokens(
manager.env,
refsTok,
olHTML,
olProcessor,
// The <ol> HTML above is wrapper HTML added on and doesn't
// have any DSR on it. We want DSR added to it.
{ aboutId: referencesId, setDSR: true, isForeignContent: true }
),
});
};
Util.processExtSource(manager, refsTok, {
// Partial pipeline for processing ref-content
// Expand till stage 2 so that all embedded
// ref tags get processed
pipelineType: 'text/x-mediawiki/full',
pipelineOpts: {
// In order to associated ref-tags nested here with this references
// object, we have to pass along the references id.
extTag: "references",
extTagId: referencesId,
wrapTemplates: pipelineOpts.wrapTemplates,
inTemplate: pipelineOpts.inTemplate,
},
res: [],
parentCB: cb,
emptyContentCB: emitReferencesFragment,
endCB: emitReferencesFragment,
documentCB: function(refsDoc) {
emitReferencesFragment([], DU.serializeChildren(refsDoc.body));
},
});
};
References.prototype.extractRefFromNode = function(node, refsData,
refInRefProcessor, referencesAboutId, referencesGroup, refsInReferencesHTML) {
var nestedInReferences = referencesAboutId !== undefined;
var dp = DU.getDataParsoid(node);
// SSS FIXME: Need to clarify semantics here.
// If both the containing <references> elt as well as the nested <ref>
// elt has a group attribute, what takes precedence?
var group = dp.group || referencesGroup || '';
var refName = dp.name;
var about = node.getAttribute("about");
var ref = refsData.add(group, refName, about, nestedInReferences);
var nodeType = (node.getAttribute("typeof") || '').replace(/mw:Extension\/ref\/Marker/, '');
// Add ref-index linkback
var doc = node.ownerDocument;
var span = doc.createElement('span');
var content = dp.content;
var dataMW = Util.clone(DU.getDataMw(node));
var body;
if (dp.hasRefInRef) {
var html = DU.parseHTML(content).body;
refInRefProcessor(html);
// Save data attribs for the nested DOM
// since we are serializing it to a string.
DU.saveDataAttribsForDOM(html);
content = DU.serializeChildren(html);
}
if (content) {
// If there are multiple <ref>s with the same name, but different content,
// the content of the first <ref> shows up in the <references> section.
// in order to ensure lossless RT-ing for later <refs>, we have to record
// HTML inline for all of them.
if (ref.hasMultiples && content !== ref.content) {
body = { 'html': content };
} else {
body = { 'id': "mw-reference-text-" + ref.target };
}
}
// data-mw will not be empty in scenarios where the <ref> is also templated.
// In those cases, the transclusion markup takes precedence over the <ref> markup.
// So, we aren't updating data-mw.
if (!Object.keys(dataMW).length) {
dataMW = {
'name': 'ref',
// Dont set body if this is a reused reference
// like <ref name='..' /> with empty content.
'body': body,
'attrs': {
// 1. Use 'dp.group' (which is the group attribute that the ref node had)
// rather than use 'group' (which could be the group from an enclosing
// <references> tag).
// 2. Dont emit empty keys
'group': dp.group || undefined,
'name': refName || undefined,
},
};
}
DU.addAttributes(span, {
'about': about,
'class': 'mw-ref',
'id': nestedInReferences ? undefined :
(ref.name ? ref.linkbacks[ref.linkbacks.length - 1] : ref.id),
'rel': 'dc:references',
'typeof': nodeType,
});
DU.addTypeOf(span, "mw:Extension/ref");
var dataParsoid = {
src: dp.src,
dsr: dp.dsr,
pi: dp.pi,
};
DU.setDataParsoid(span, dataParsoid);
DU.setDataMw(span, dataMW);
// refLink is the link to the citation
var refLink = doc.createElement('a');
DU.addAttributes(refLink, {
'href': '#' + ref.target,
'style': 'counter-reset: mw-Ref ' + ref.groupIndex + ';',
});
if (ref.group) {
refLink.setAttribute('data-mw-group', ref.group);
}
// refLink-span which will contain a default rendering of the cite link
// for browsers that don't support counters
var refLinkSpan = doc.createElement('span');
refLinkSpan.setAttribute('class', 'mw-reflink-text');
refLinkSpan.appendChild(doc.createTextNode("[" +
(ref.group? ref.group + " " : "") + ref.groupIndex + "]"));
refLink.appendChild(refLinkSpan);
span.appendChild(refLink);
if (!nestedInReferences) {
node.parentNode.insertBefore(span, node);
} else {
DU.storeDataParsoid(span, dataParsoid);
DU.storeDataMw(span, dataMW);
refsInReferencesHTML.push(DU.serializeNode(span).str, '\n');
}
// Keep the first content to compare multiple <ref>s with the same name.
if (!ref.content) {
ref.content = content;
}
};
References.prototype.insertReferencesIntoDOM = function(refsNode, refsData, refsInReferencesHTML) {
var dp = DU.getDataParsoid(refsNode);
var group = dp.group || '';
var src = dp.src || '<references/>'; // fall back so we don't crash
// Extract ext-source for <references>..</references> usage
var body = Util.extractExtBody("references", src).trim();
var refGroup = refsData.getRefGroup(group);
var dataMW = DU.getDataMw(refsNode);
if (!Object.keys(dataMW).length) {
var datamwBody;
// We'll have to output data-mw.body.extsrc in
// scenarios where original wikitext was of the form:
// "<references> lot of refs here </references>"
// Ex: See [[en:Barack Obama]]
if (body.length > 0) {
datamwBody = {
'extsrc': body,
'html': refsInReferencesHTML.join(''),
};
}
dataMW = {
'name': 'references',
'body': datamwBody,
'attrs': {
// Dont emit empty keys
'group': group || undefined,
},
};
DU.setDataMw(refsNode, dataMW);
}
// Remove all children from the references node
//
// Ex: When {{Reflist}} is reused from the cache, it comes with
// a bunch of references as well. We have to remove all those cached
// references before generating fresh references.
while (refsNode.firstChild) {
refsNode.removeChild(refsNode.firstChild);
}
if (refGroup) {
refGroup.refs.forEach(refGroup.renderLine.bind(refGroup, refsNode));
}
// Remove the group from refsData
refsData.removeRefGroup(group);
};
// Process <ref>s left behind after the DOM is fully processed.
// We process them as if there was an implicit <references /> tag at
// the end of the DOM.
References.prototype.insertMissingReferencesIntoDOM = function(env, refsData, node) {
var doc = node.ownerDocument;
var self = this;
refsData.refGroups.forEach(function(refsValue, refsGroup) {
var ol = doc.createElement('ol');
var dp = DU.getDataParsoid(ol);
DU.addAttributes(ol, {
'class': 'mw-references',
typeof: 'mw:Extension/references',
about: env.newAboutId(),
});
// The new references come out of "nowhere", so to make selser work
// propertly, add a zero-sized DSR pointing to the end of the document.
dp.dsr = [env.page.src.length, env.page.src.length, 0, 0];
dp.autoInsertedRefs = true;
if (refsGroup) {
ol.setAttribute('data-mw-group', refsGroup);
dp.group = refsGroup;
}
DU.storeDataParsoid(ol, dp);
// Add a \n before the <ol> so that when serialized to wikitext,
// each <references /> tag appears on its own line.
node.appendChild(doc.createTextNode("\n"));
node.appendChild(ol);
self.insertReferencesIntoDOM(ol, refsData, [""]);
});
};
var serialHandler = {
handle: Promise.method(function(node, state, wrapperUnmodified) {
var dp = DU.getDataParsoid(node);
var dataMW = DU.getDataMw(node);
if (dp.autoInsertedRefs && state.rtTestMode) {
// Eliminate auto-inserted <references /> noise in rt-testing
return Promise.resolve('');
} else {
return state.serializer.defaultExtensionHandler(node, dataMW);
}
}),
before: function(node, otherNode, state) {
// Serialize new references tags on a new line.
if (DU.isNewElt(node)) {
return { min: 1, max: 2 };
} else {
return null;
}
},
};
/* --------------------------------------------
* This handles wikitext like this:
*
* <references> <ref>foo</ref> </references>
* <references> <ref>bar</ref> </references>
* -------------------------------------------- */
var _processRefs, _processRefsInReferences;
_processRefsInReferences = function(cite, refsData, node, referencesId,
referencesGroup, nestedRefsHTML) {
var child = node.firstChild;
while (child !== null) {
var nextChild = child.nextSibling;
if (DU.isElt(child)) {
var typeOf = child.getAttribute('typeof');
if ((/(?:^|\s)mw:Extension\/ref\/Marker(?=$|\s)/).test(typeOf)) {
cite.references.extractRefFromNode(child, refsData,
_processRefs.bind(null, cite, refsData),
referencesId, referencesGroup, nestedRefsHTML);
} else if (child.childNodes.length > 0) {
_processRefsInReferences(cite, refsData,
child, referencesId, referencesGroup, nestedRefsHTML);
}
}
child = nextChild;
}
};
_processRefs = function(cite, refsData, node) {
var child = node.firstChild;
while (child !== null) {
var nextChild = child.nextSibling;
if (DU.isElt(child)) {
var typeOf = child.getAttribute('typeof');
if ((/(?:^|\s)mw:Extension\/ref\/Marker(?=$|\s)/).test(typeOf)) {
cite.references.extractRefFromNode(child, refsData,
_processRefs.bind(null, cite, refsData));
} else if ((/(?:^|\s)mw:Extension\/references(?=$|\s)/).test(typeOf)) {
var referencesId = child.getAttribute("about");
var referencesGroup = DU.getDataParsoid(child).group;
var nestedRefsHTML = ["\n"];
_processRefsInReferences(cite, refsData,
child, referencesId, referencesGroup, nestedRefsHTML);
cite.references.insertReferencesIntoDOM(child, refsData, nestedRefsHTML);
} else {
// inline image -- look inside the data-mw attribute
if (DU.isInlineImage(child)) {
/* -----------------------------------------------------------------
* SSS FIXME: This works but feels very special-cased in 2 ways:
*
* 1. special cased to images vs. any node that might have
* serialized HTML embedded in data-mw
* 2. special cased to global cite handling -- the general scenario
* is DOM post-processors that do different things on the
* top-level vs not.
* - Cite needs to process these fragments in the context of the
* top-level page, and has to be done in order of how the nodes
* are encountered.
* - DOM cleanup can be done on embedded fragments without
* any page-level context and in any order.
* - So, some variability here.
*
* We should be running dom.cleanup.js passes on embedded html
* in data-mw and other attributes. Since correctness doesn't
* depend on that cleanup, I am not adding more special-case
* code in dom.cleanup.js.
*
* Doing this more generically will require creating a DOMProcessor
* class and adding state to it.
* ----------------------------------------------------------------- */
var dmw = DU.getDataMw(child);
var caption = dmw.caption;
if (caption) {
// Extract the caption HTML, build the DOM, process refs,
// save data attribs, serialize to HTML, update the caption HTML.
var captionDOM = DU.parseHTML(caption);
_processRefs(cite, refsData, captionDOM.body);
DU.saveDataAttribsForDOM(captionDOM.body);
// FIXME: We do this in a lot of places with embedded HTML,
// but, should we be running the XML-serializer on it?
// Once again, this is a generic cleanup to be done unrelated
// to this patch.
dmw.caption = captionDOM.body.innerHTML;
}
}
if (child.childNodes.length > 0) {
_processRefs(cite, refsData, child);
}
}
}
child = nextChild;
}
};
/**
* Native Parsoid implementation of the Cite extension
* that ties together <ref> and <references>
*/
var Cite = function() {
this.ref = new Ref(this);
this.references = new References(this);
this.config = {
domPostProcessor: this.domPostProcessor.bind(this),
tags: [
{
name: 'ref',
tokenHandler: this.ref.tokenHandler.bind(this.ref),
}, {
name: 'references',
tokenHandler: this.references.tokenHandler.bind(this.references),
serialHandler: serialHandler,
},
],
};
};
// DOM Post Processor
Cite.prototype.domPostProcessor = function(node, env, options, atTopLevel) {
if (atTopLevel) {
var refsData = new ReferencesData();
_processRefs(this, refsData, node);
this.references.insertMissingReferencesIntoDOM(env, refsData, node);
}
};
if (typeof module === "object") {
module.exports.Cite = Cite;
}