mediawiki-extensions-Cite/lib/ext/Cite/References.js

'use strict';

const ParsoidExtApi = module.parent.parent.require('./extapi.js').versionCheck('^0.10.0');
const { ContentUtils, DOMDataUtils, DOMUtils, TokenUtils, WTUtils, Promise } = ParsoidExtApi;

/**
 * @class
 */
class References {
	static hasRef(node) {
		var c = node.firstChild;
		while (c) {
			if (DOMUtils.isElt(c)) {
				if (WTUtils.isSealedFragmentOfType(c, 'ref')) {
					return true;
				}
				if (References.hasRef(c)) {
					return true;
				}
			}
			c = c.nextSibling;
		}
		return false;
	}

	static toDOM(state, content, args) {
		return ParsoidExtApi.parseTokenContentsToDOM(state, args, '', content, {
			wrapperTag: 'div',
			pipelineOpts: {
				extTag: 'references',
				inTemplate: state.parseContext.inTemplate,
			},
		}).then(function(doc) {
			var refsOpts = Object.assign({
				group: null,
				responsive: null,
			}, TokenUtils.kvToHash(args, true));

			var frag = References.createReferences(state.env, doc, doc.body, refsOpts, function(dp) {
				dp.src = state.extToken.hasAttribute('source') ? state.extToken.getAttribute('source') : null;
				// Redundant - also present on doc.body.firstChild, but feels cumbersome to use
				dp.selfClose = state.extToken.dataAttribs.selfClose;
			});
			doc.body.appendChild(frag);

			return doc;
		});
	}

	static createReferences(env, doc, body, refsOpts, modifyDp, autoGenerated) {
		var ol = doc.createElement('ol');
		ol.classList.add('mw-references');
		ol.classList.add('references');

		if (body) {
			DOMUtils.migrateChildren(body, ol);
		}

		// Support the `responsive` parameter
		var rrOpts = env.conf.wiki.responsiveReferences;
		var responsiveWrap = rrOpts.enabled;
		if (refsOpts.responsive !== null) {
			responsiveWrap = refsOpts.responsive !== '0';
		}

		var frag;
		if (responsiveWrap) {
			var div = doc.createElement('div');
			div.classList.add('mw-references-wrap');
			div.appendChild(ol);
			frag = div;
		} else {
			frag = ol;
		}

		if (autoGenerated) {
			DOMDataUtils.addAttributes(frag, {
				typeof: 'mw:Extension/references',
				about: env.newAboutId(),
			});
		}

		var dp = DOMDataUtils.getDataParsoid(frag);
		if (refsOpts.group) {  // No group for the empty string either
			dp.group = refsOpts.group;
			ol.setAttribute('data-mw-group', refsOpts.group);
		}
		if (typeof modifyDp === 'function') {
			modifyDp(dp);
		}

		return frag;
	}

	static extractRefFromNode(node, refsData, referencesAboutId, referencesGroup, nestedRefsHTML) {
		var env = refsData.env;
		var doc = node.ownerDocument;
		var nestedInReferences = referencesAboutId !== undefined;

		// This is data-parsoid from the dom fragment node that's gone through
		// dsr computation and template wrapping.
		var nodeDp = DOMDataUtils.getDataParsoid(node);
		var typeOf = node.getAttribute('typeof') || '';
		var isTplWrapper = /\bmw:Transclusion\b/.test(typeOf);
		var nodeType = typeOf.replace(/mw:DOMFragment\/sealed\/ref/, '');
		var content = nodeDp.html;
		var tplDmw = isTplWrapper ? DOMDataUtils.getDataMw(node) : null;

		// This is the <sup> that's the meat of the sealed fragment
		var c = env.fragmentMap.get(content)[0];
		// All the actions that require loaded data-attributes on `c` are done
		// here so that we can quickly store those away for later.
		DOMDataUtils.visitAndLoadDataAttribs(c);
		var cDp = DOMDataUtils.getDataParsoid(c);
		var refDmw = DOMDataUtils.getDataMw(c);
		if (!cDp.empty && References.hasRef(c)) {  // nested ref-in-ref
			References._processRefs(env, refsData, c);
		}
		DOMDataUtils.visitAndStoreDataAttribs(c);

		// Use the about attribute on the wrapper with priority, since it's
		// only added when the wrapper is a template sibling.
		const about = node.hasAttribute('about') ? node.getAttribute('about') :
			c.hasAttribute('about') ? c.getAttribute('about') : '';

		// FIXME(SSS): Need to clarify semantics here.
		// If both the containing <references> elt as well as the nested <ref>
		// elt has a group attribute, what takes precedence?
		var group = refDmw.attrs.group || referencesGroup || '';
		var refName = refDmw.attrs.name || '';
		var ref = refsData.add(env, group, refName, about, nestedInReferences);

		// Add ref-index linkback
		var linkBack = doc.createElement('sup');

		// FIXME: Lot of useless work for an edge case
		if (cDp.empty) {
			// Discard wrapper if there was no input wikitext
			content = null;
			if (cDp.selfClose) {
				refDmw.body = undefined;
			} else {
				refDmw.body = { 'html': '' };
			}
		} else {
			// If there are multiple <ref>s with the same name, but different content,
			// the content of the first <ref> shows up in the <references> section.
			// in order to ensure lossless RT-ing for later <refs>, we have to record
			// HTML inline for all of them.
			var html = '';
			var contentDiffers = false;
			if (ref.hasMultiples) {
				// Use the non-pp version here since we've already stored attribs
				// before putting them in the map.
				html = ContentUtils.toXML(c, { innerXML: true });
				contentDiffers = html !== ref.cachedHtml;
			}
			if (contentDiffers) {
				refDmw.body = { 'html': html };
			} else {
				refDmw.body = { 'id': "mw-reference-text-" + ref.target };
			}
		}

		DOMDataUtils.addAttributes(linkBack, {
			'about': about,
			'class': 'mw-ref',
			'id': nestedInReferences ? undefined :
			(ref.name ? ref.linkbacks[ref.linkbacks.length - 1] : ref.id),
			'rel': 'dc:references',
			'typeof': nodeType,
		});
		DOMDataUtils.addTypeOf(linkBack, "mw:Extension/ref");
		var dataParsoid = {
			src: nodeDp.src,
			dsr: nodeDp.dsr,
			pi: nodeDp.pi,
		};
		DOMDataUtils.setDataParsoid(linkBack, dataParsoid);
		if (isTplWrapper) {
			DOMDataUtils.setDataMw(linkBack, tplDmw);
		} else {
			DOMDataUtils.setDataMw(linkBack, refDmw);
		}

		// refLink is the link to the citation
		var refLink = doc.createElement('a');
		DOMDataUtils.addAttributes(refLink, {
			'href': env.page.titleURI + '#' + ref.target,
			'style': 'counter-reset: mw-Ref ' + ref.groupIndex + ';',
		});
		if (ref.group) {
			refLink.setAttribute('data-mw-group', ref.group);
		}

		// refLink-span which will contain a default rendering of the cite link
		// for browsers that don't support counters
		var refLinkSpan = doc.createElement('span');
		refLinkSpan.setAttribute('class', 'mw-reflink-text');
		refLinkSpan.appendChild(doc.createTextNode("[" +
			(ref.group ? ref.group + " " : "") + ref.groupIndex + "]"));
		refLink.appendChild(refLinkSpan);
		linkBack.appendChild(refLink);

		if (!nestedInReferences) {
			node.parentNode.replaceChild(linkBack, node);
		} else {
			// We don't need to delete the node now since it'll be removed in
			// `insertReferencesIntoDOM` when all the children all cleaned out.
			nestedRefsHTML.push(ContentUtils.ppToXML(linkBack), '\n');
		}

		// Keep the first content to compare multiple <ref>s with the same name.
		if (!ref.content) {
			ref.content = content;
			ref.dir = (refDmw.attrs.dir || '').toLowerCase();
		}
	}

	static insertReferencesIntoDOM(refsNode, refsData, nestedRefsHTML, autoGenerated) {
		var env = refsData.env;
		var isTplWrapper = /\bmw:Transclusion\b/.test(refsNode.getAttribute('typeof') || '');
		var dp = DOMDataUtils.getDataParsoid(refsNode);
		var group = dp.group || '';
		if (!isTplWrapper) {
			var dataMw = DOMDataUtils.getDataMw(refsNode);
			if (!Object.keys(dataMw).length) {
				// FIXME: This can be moved to `insertMissingReferencesIntoDOM`
				console.assert(autoGenerated);
				dataMw = {
					'name': 'references',
					'attrs': {
						'group': group || undefined, // Dont emit empty keys
					},
				};
				DOMDataUtils.setDataMw(refsNode, dataMw);
			}

			// Mark this auto-generated so that we can skip this during
			// html -> wt and so that clients can strip it if necessary.
			if (autoGenerated) {
				dataMw.autoGenerated = true;
			} else if (nestedRefsHTML.length > 0) {
				dataMw.body = { 'html': '\n' + nestedRefsHTML.join('') };
			} else if (!dp.selfClose) {
				dataMw.body = { 'html': '' };
			} else {
				dataMw.body = undefined;
			}
			dp.selfClose = undefined;
		}

		var refGroup = refsData.getRefGroup(group);

		// Deal with responsive wrapper
		if (refsNode.classList.contains('mw-references-wrap')) {
			var rrOpts = env.conf.wiki.responsiveReferences;
			if (refGroup && refGroup.refs.length > rrOpts.threshold) {
				refsNode.classList.add('mw-references-columns');
			}
			refsNode = refsNode.firstChild;
		}

		// Remove all children from the references node
		//
		// Ex: When {{Reflist}} is reused from the cache, it comes with
		// a bunch of references as well. We have to remove all those cached
		// references before generating fresh references.
		while (refsNode.firstChild) {
			refsNode.removeChild(refsNode.firstChild);
		}

		if (refGroup) {
			refGroup.refs.forEach(ref => refGroup.renderLine(env, refsNode, ref));
		}

		// Remove the group from refsData
		refsData.removeRefGroup(group);
	}

	/**
	 * Process `<ref>`s left behind after the DOM is fully processed.
	 * We process them as if there was an implicit `<references />` tag at
	 * the end of the DOM.
	 */
	static insertMissingReferencesIntoDOM(refsData, node) {
		var env = refsData.env;
		var doc = node.ownerDocument;

		refsData.refGroups.forEach((refsValue, refsGroup) => {
			var frag = References.createReferences(env, doc, null, {
				group: refsGroup,
				responsive: null,
			}, function(dp) {
				// The new references come out of "nowhere", so to make selser work
				// propertly, add a zero-sized DSR pointing to the end of the document.
				dp.dsr = [env.page.src.length, env.page.src.length, 0, 0];
			}, true);

			// Add a \n before the <ol> so that when serialized to wikitext,
			// each <references /> tag appears on its own line.
			node.appendChild(doc.createTextNode("\n"));
			node.appendChild(frag);

			References.insertReferencesIntoDOM(frag, refsData, [""], true);
		});
	}

	static lintHandler(refs, env, tplInfo, domLinter) {
		// Nothing to do
		//
		// FIXME: Not entirely true for scenarios where the <ref> tags
		// are defined in the references section that is itself templated.
		//
		// {{1x|<references>\n<ref name='x'><b>foo</ref>\n</references>}}
		//
		// In this example, the references tag has the right tplInfo and
		// when the <ref> tag is processed in the body of the article where
		// it is accessed, there is no relevant template or dsr info available.
		//
		// Ignoring for now.
		return refs.nextSibling;
	}

	static _processRefs(env, refsData, node) {
		var child = node.firstChild;
		while (child !== null) {
			var nextChild = child.nextSibling;
			if (DOMUtils.isElt(child)) {
				if (WTUtils.isSealedFragmentOfType(child, 'ref')) {
					References.extractRefFromNode(child, refsData);
				} else if ((/(?:^|\s)mw:Extension\/references(?=$|\s)/).test(child.getAttribute('typeof') || '')) {
					var referencesId = child.getAttribute("about") || '';
					var referencesGroup = DOMDataUtils.getDataParsoid(child).group;
					var nestedRefsHTML = [];
					References._processRefsInReferences(refsData, child, referencesId, referencesGroup, nestedRefsHTML);
					References.insertReferencesIntoDOM(child, refsData, nestedRefsHTML);
				} else {
					// inline media -- look inside the data-mw attribute
					if (WTUtils.isInlineMedia(child)) {
						/* -----------------------------------------------------------------
						 * FIXME(subbu): This works but feels very special-cased in 2 ways:
						 *
						 * 1. special cased to images vs. any node that might have
						 *    serialized HTML embedded in data-mw
						 * 2. special cased to global cite handling -- the general scenario
						 *    is DOM post-processors that do different things on the
						 *    top-level vs not.
						 *    - Cite needs to process these fragments in the context of the
						 *      top-level page, and has to be done in order of how the nodes
						 *      are encountered.
						 *    - DOM cleanup can be done on embedded fragments without
						 *      any page-level context and in any order.
						 *    - So, some variability here.
						 *
						 * We should be running dom.cleanup.js passes on embedded html
						 * in data-mw and other attributes. Since correctness doesn't
						 * depend on that cleanup, I am not adding more special-case
						 * code in dom.cleanup.js.
						 *
						 * Doing this more generically will require creating a DOMProcessor
						 * class and adding state to it.
						 *
						 * See T214994
						 * ----------------------------------------------------------------- */
						var dmw = DOMDataUtils.getDataMw(child);
						var caption = dmw.caption;
						if (caption) {
							// Extract the caption HTML, build the DOM, process refs,
							// serialize to HTML, update the caption HTML.
							var captionDOM = ContentUtils.ppToDOM(env, caption);
							References._processRefs(env, refsData, captionDOM);
							dmw.caption = ContentUtils.ppToXML(captionDOM, { innerXML: true });
						}
					}
					if (child.hasChildNodes()) {
						References._processRefs(env, refsData, child);
					}
				}
			}
			child = nextChild;
		}
	}

	/**
	 * This handles wikitext like this:
	 * ```
	 *   <references> <ref>foo</ref> </references>
	 *   <references> <ref>bar</ref> </references>
	 * ```
	 * @private
	 */
	static _processRefsInReferences(refsData, node, referencesId, referencesGroup, nestedRefsHTML) {
		var child = node.firstChild;
		while (child !== null) {
			var nextChild = child.nextSibling;
			if (DOMUtils.isElt(child)) {
				if (WTUtils.isSealedFragmentOfType(child, 'ref')) {
					References.extractRefFromNode(child, refsData, referencesId, referencesGroup, nestedRefsHTML);
				} else if (child.hasChildNodes()) {
					References._processRefsInReferences(refsData, child, referencesId, referencesGroup, nestedRefsHTML);
				}
			}
			child = nextChild;
		}
	}
}

References.serialHandler = {
	handle: Promise.async(function *(node, state, wrapperUnmodified) {
		var dataMw = DOMDataUtils.getDataMw(node);
		if (dataMw.autoGenerated && state.rtTestMode) {
			// Eliminate auto-inserted <references /> noise in rt-testing
			return '';
		} else {
			var startTagSrc = yield state.serializer.serializeExtensionStartTag(node, state);
			if (!dataMw.body) {
				return startTagSrc;  // We self-closed this already.
			} else if (typeof dataMw.body.html === 'string') {
				var src = yield state.serializer.serializeHTML({
					env: state.env,
					extName: dataMw.name,
				}, dataMw.body.html);
				return startTagSrc + src + '</' + dataMw.name + '>';
			} else {
				state.env.log('error',
					'References body unavailable for: ' + node.outerHTML);
				return '';  // Drop it!
			}
		}
	}),
	// FIXME: LEAKY -- Should we expose newline constraints to extensions?
	before: function(node, otherNode, state) {
		// Serialize new references tags on a new line.
		if (WTUtils.isNewElt(node)) {
			return { min: 1, max: 2 };
		} else {
			return null;
		}
	},
};

module.exports = References;