mediawiki-extensions-Visual.../modules/parser/mediawiki.DOMPostProcessor.js
Gabriel Wicke 8368e17d6a Biggish token transform system refactoring
* All parser pipelines including tokenizer and DOM stuff are now constructed
  from a 'recipe' data structure in a ParserPipelineFactory.

* All sub-pipelines of these can now be cached

* Event registrations to a pipeline are directly forwarded to the last
  pipeline member to save relatively expensive event forwarding.

* Some APIs for on-demand expansion / format conversion of parameters from
  parser functions are added:

  param.to('tokens/expanded', cb)
  param.to('text/wiki', cb) (this does not work yet)

  All parameters are additionally wrapped into a Param object that provides
  method for positional parameter naming (.named() or conversion to a dict
  (.dict()).

* The async token transform manager is now separated from a frame object, with
  the frame holding arguments, an on-demand expansion method and loop checks.

* Only keys of template parameters are now expanded. Parser functions or
  template arguments trigger an expansion on-demand. This (unsurprisingly)
  makes a big performance difference with typical switch-heavy template
  systems.

* Return values from async transforms are no longer used in favor of plain
  callbacks. This saves the complication of having to maintain two code paths.
  A trick in transformTokens still avoids the construction of unneeded
  TokenAccumulators.

* The results of template expansions are no longer buffered.

* 301 parser tests are passing

Known issues:

* Cosmetic cleanup remains to do
* Some parser functions do not support async expansions yet, and need to be
  modified.

Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 16:51:36 +02:00

83 lines
2.2 KiB
JavaScript

/* Perform post-processing steps on an already-built HTML DOM. */
var events = require('events'),
util = require('./ext.Util.js'),
Util = new util.Util();
// Quick HACK: define Node constants
// https://developer.mozilla.org/en/nodeType
var Node = {
TEXT_NODE: 3,
COMMENT_NODE: 8
};
var isElementContentWhitespace = function ( e ) {
return (e.data.match(/^[ \r\n\t]*$/) !== null);
};
// Wrap all top-level inline elements in paragraphs. This should also be
// applied inside block-level elements, but in that case the first paragraph
// usually remains plain inline.
var process_inlines_in_p = function ( document ) {
var body = document.body,
newP = document.createElement('p'),
cnodes = body.childNodes,
inParagraph = false,
deleted = 0;
for(var i = 0, length = cnodes.length; i < length; i++) {
var child = cnodes[i - deleted],
ctype = child.nodeType;
//console.warn(child + ctype);
if ((ctype === 3 && (inParagraph || !isElementContentWhitespace( child ))) ||
(ctype === Node.COMMENT_NODE && inParagraph ) ||
(ctype !== Node.TEXT_NODE &&
ctype !== Node.COMMENT_NODE &&
!Util.isBlockTag(child.nodeName.toLowerCase()))
)
{
// wrap in paragraph
newP.appendChild(child);
inParagraph = true;
deleted++;
} else if (inParagraph) {
body.insertBefore(newP, child);
deleted--;
newP = document.createElement('p');
inParagraph = false;
}
}
if (inParagraph) {
body.appendChild(newP);
}
};
function DOMPostProcessor () {
this.processors = [process_inlines_in_p];
}
// Inherit from EventEmitter
DOMPostProcessor.prototype = new events.EventEmitter();
DOMPostProcessor.prototype.constructor = DOMPostProcessor;
DOMPostProcessor.prototype.doPostProcess = function ( document ) {
for(var i = 0; i < this.processors.length; i++) {
this.processors[i](document);
}
this.emit( 'document', document );
};
/**
* Register for the 'document' event, normally emitted form the HTML5 tree
* builder.
*/
DOMPostProcessor.prototype.addListenersOn = function ( emitter ) {
emitter.addListener( 'document', this.doPostProcess.bind( this ) );
}
if (typeof module == "object") {
module.exports.DOMPostProcessor = DOMPostProcessor;
}